[Refactor] Visualization Refactor

27a546e9 · ZCMax · ChaimZhu · 0e17beab · 27a546e9 · 27a546e9
Commit 27a546e9 authored Jul 19, 2022 by ZCMax Committed by ChaimZhu Jul 20, 2022
16 changed files
--- a/configs/_base_/default_runtime.py
+++ b/configs/_base_/default_runtime.py
@@ -14,6 +14,11 @@ env_cfg = dict(
    dist_cfg=dict(backend='nccl'),
 )
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
 log_level = 'INFO'
 load_from = None
 resume = False

--- a/demo/mono_det_demo.py
+++ b/demo/mono_det_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser
-from mmdet3d.apis import (inference_mono_3d_detector, init_model,
+import mmcv
-                          show_result_meshlab)
+from mmdet3d.apis import inference_mono_3d_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules
-def main():
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('image', help='image file')
    parser.add_argument('ann', help='ann file')
@@ -26,21 +29,42 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()
    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
    # test a single image
    result, data = inference_mono_3d_detector(model, args.image, args.ann)
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+    data_input = dict(img=img)
    # show the results
-    show_result_meshlab(
+    visualizer.add_datasample(
-        data,
+        'result',
-        result,
+        data_input,
-        args.out_dir,
+        pred_sample=result,
-        args.score_thr,
+        show=True,
-        show=args.show,
+        wait_time=0,
-        snapshot=args.snapshot,
+        out_file=args.out_file,
-        task='mono-det')
+        pred_score_thr=args.score_thr,
+        vis_task='multi_modality-det')
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/multi_modality_demo.py
+++ b/demo/multi_modality_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser
-from mmdet3d.apis import (inference_multi_modality_detector, init_model,
+import mmcv
-                          show_result_meshlab)
+import numpy as np
+from mmdet3d.apis import inference_multi_modality_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules
-def main():
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('image', help='image file')
@@ -27,22 +31,44 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()
    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
+    # test a single image and point cloud sample
    result, data = inference_multi_modality_detector(model, args.pcd,
                                                     args.image, args.ann)
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+    data_input = dict(points=points, img=img)
    # show the results
-    show_result_meshlab(
+    visualizer.add_datasample(
-        data,
+        'result',
-        result,
+        data_input,
-        args.out_dir,
+        pred_sample=result,
-        args.score_thr,
+        show=True,
-        show=args.show,
+        wait_time=0,
-        snapshot=args.snapshot,
+        out_file=args.out_file,
-        task='multi_modality-det')
+        pred_score_thr=args.score_thr,
+        vis_task='multi_modality-det')
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/pc_seg_demo.py
+++ b/demo/pc_seg_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser
-from mmdet3d.apis import inference_segmentor, init_model, show_result_meshlab
+import numpy as np
+from mmdet3d.apis import inference_segmentor, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules
-def main():
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('config', help='Config file')
@@ -22,21 +26,40 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()
    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
+    # test a single point cloud sample
    result, data = inference_segmentor(model, args.pcd)
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    data_input = dict(points=points)
    # show the results
-    show_result_meshlab(
+    visualizer.add_datasample(
-        data,
+        'result',
-        result,
+        data_input,
-        args.out_dir,
+        pred_sample=result,
-        show=args.show,
+        show=True,
-        snapshot=args.snapshot,
+        wait_time=0,
-        task='seg',
+        out_file=args.out_file,
-        palette=model.PALETTE)
+        pred_score_thr=args.score_thr,
+        vis_task='seg')
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/pcd_demo.py
+++ b/demo/pcd_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser
-from mmdet3d.apis import inference_detector, init_model, show_result_meshlab
+import numpy as np
+from mmdet3d.apis import inference_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules
-def main():
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('config', help='Config file')
@@ -24,21 +28,41 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()
+    # TODO: Support inference of point cloud numpy file.
    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.palette
+    }
+    # test a single point cloud sample
    result, data = inference_detector(model, args.pcd)
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    data_input = dict(points=points)
    # show the results
-    show_result_meshlab(
+    visualizer.add_datasample(
-        data,
+        'result',
-        result,
+        data_input,
-        args.out_dir,
+        pred_sample=result,
-        args.score_thr,
+        show=True,
-        show=args.show,
+        wait_time=0,
-        snapshot=args.snapshot,
+        out_file=args.out_file,
-        task='det')
+        pred_score_thr=args.score_thr,
+        vis_task='det')
 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
@@ -2,13 +2,12 @@
 from .inference import (convert_SyncBN, inference_detector,
                        inference_mono_3d_detector,
                        inference_multi_modality_detector, inference_segmentor,
-                        init_model, show_result_meshlab)
+                        init_model)
 __all__ = [
    'inference_detector',
    'init_model',
    'inference_mono_3d_detector',
-    'show_result_meshlab',
    'convert_SyncBN',
    'inference_multi_modality_detector',
    'inference_segmentor',

--- a/mmdet3d/apis/inference.py
+++ b/mmdet3d/apis/inference.py
@@ -9,10 +9,7 @@ import torch
 from mmcv.parallel import collate, scatter
 from mmcv.runner import load_checkpoint
-from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
+from mmdet3d.core import Box3DMode
-                          DepthInstance3DBoxes, LiDARInstance3DBoxes,
-                          show_multi_modality_result, show_result,
-                          show_seg_result)
 from mmdet3d.core.bbox import get_box_type
 from mmdet3d.datasets.pipelines import Compose
 from mmdet3d.models import build_model
@@ -323,204 +320,3 @@ def inference_segmentor(model, pcd):
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result, data
-def show_det_result_meshlab(data,
-                            result,
-                            out_dir,
-                            score_thr=0.0,
-                            show=False,
-                            snapshot=False):
-    """Show 3D detection result by meshlab."""
-    points = data['points'][0][0].cpu().numpy()
-    pts_filename = data['img_metas'][0][0]['pts_filename']
-    file_name = osp.split(pts_filename)[-1].split('.')[0]
-    if 'pts_bbox' in result[0].keys():
-        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
-        pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
-    else:
-        pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
-        pred_scores = result[0]['scores_3d'].numpy()
-    # filter out low score bboxes for visualization
-    if score_thr > 0:
-        inds = pred_scores > score_thr
-        pred_bboxes = pred_bboxes[inds]
-    # for now we convert points into depth mode
-    box_mode = data['img_metas'][0][0]['box_mode_3d']
-    if box_mode != Box3DMode.DEPTH:
-        points = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH)
-        show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)
-    else:
-        show_bboxes = deepcopy(pred_bboxes)
-    show_result(
-        points,
-        None,
-        show_bboxes,
-        out_dir,
-        file_name,
-        show=show,
-        snapshot=snapshot)
-    return file_name
-def show_seg_result_meshlab(data,
-                            result,
-                            out_dir,
-                            palette,
-                            show=False,
-                            snapshot=False):
-    """Show 3D segmentation result by meshlab."""
-    points = data['points'][0][0].cpu().numpy()
-    pts_filename = data['img_metas'][0][0]['pts_filename']
-    file_name = osp.split(pts_filename)[-1].split('.')[0]
-    pred_seg = result[0]['semantic_mask'].numpy()
-    if palette is None:
-        # generate random color map
-        max_idx = pred_seg.max()
-        palette = np.random.randint(0, 256, size=(max_idx + 1, 3))
-    palette = np.array(palette).astype(np.int)
-    show_seg_result(
-        points,
-        None,
-        pred_seg,
-        out_dir,
-        file_name,
-        palette=palette,
-        show=show,
-        snapshot=snapshot)
-    return file_name
-def show_proj_det_result_meshlab(data,
-                                 result,
-                                 out_dir,
-                                 score_thr=0.0,
-                                 show=False,
-                                 snapshot=False):
-    """Show result of projecting 3D bbox to 2D image by meshlab."""
-    assert 'img' in data.keys(), 'image data is not provided for visualization'
-    img_filename = data['img_metas'][0][0]['filename']
-    file_name = osp.split(img_filename)[-1].split('.')[0]
-    # read from file because img in data_dict has undergone pipeline transform
-    img = mmcv.imread(img_filename)
-    if 'pts_bbox' in result[0].keys():
-        result[0] = result[0]['pts_bbox']
-    elif 'img_bbox' in result[0].keys():
-        result[0] = result[0]['img_bbox']
-    pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
-    pred_scores = result[0]['scores_3d'].numpy()
-    # filter out low score bboxes for visualization
-    if score_thr > 0:
-        inds = pred_scores > score_thr
-        pred_bboxes = pred_bboxes[inds]
-    box_mode = data['img_metas'][0][0]['box_mode_3d']
-    if box_mode == Box3DMode.LIDAR:
-        if 'lidar2img' not in data['img_metas'][0][0]:
-            raise NotImplementedError(
-                'LiDAR to image transformation matrix is not provided')
-        show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            data['img_metas'][0][0]['lidar2img'],
-            out_dir,
-            file_name,
-            box_mode='lidar',
-            show=show)
-    elif box_mode == Box3DMode.DEPTH:
-        show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            None,
-            out_dir,
-            file_name,
-            box_mode='depth',
-            img_metas=data['img_metas'][0][0],
-            show=show)
-    elif box_mode == Box3DMode.CAM:
-        if 'cam2img' not in data['img_metas'][0][0]:
-            raise NotImplementedError(
-                'camera intrinsic matrix is not provided')
-        show_bboxes = CameraInstance3DBoxes(
-            pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            data['img_metas'][0][0]['cam2img'],
-            out_dir,
-            file_name,
-            box_mode='camera',
-            show=show)
-    else:
-        raise NotImplementedError(
-            f'visualization of {box_mode} bbox is not supported')
-    return file_name
-def show_result_meshlab(data,
-                        result,
-                        out_dir,
-                        score_thr=0.0,
-                        show=False,
-                        snapshot=False,
-                        task='det',
-                        palette=None):
-    """Show result by meshlab.
-    Args:
-        data (dict): Contain data from pipeline.
-        result (dict): Predicted result from model.
-        out_dir (str): Directory to save visualized result.
-        score_thr (float, optional): Minimum score of bboxes to be shown.
-            Default: 0.0
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-        task (str, optional): Distinguish which task result to visualize.
-            Currently we support 3D detection, multi-modality detection and
-            3D segmentation. Defaults to 'det'.
-        palette (list[list[int]]] | np.ndarray, optional): The palette
-            of segmentation map. If None is given, random palette will be
-            generated. Defaults to None.
-    """
-    assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \
-        f'unsupported visualization task {task}'
-    assert out_dir is not None, 'Expect out_dir, got none.'
-    if task in ['det', 'multi_modality-det']:
-        file_name = show_det_result_meshlab(data, result, out_dir, score_thr,
-                                            show, snapshot)
-    if task in ['seg']:
-        file_name = show_seg_result_meshlab(data, result, out_dir, palette,
-                                            show, snapshot)
-    if task in ['multi_modality-det', 'mono-det']:
-        file_name = show_proj_det_result_meshlab(data, result, out_dir,
-                                                 score_thr, show, snapshot)
-    return out_dir, file_name
--- a/mmdet3d/core/__init__.py
+++ b/mmdet3d/core/__init__.py
@@ -6,5 +6,5 @@ from .evaluation import *  # noqa: F401, F403
 from .points import *  # noqa: F401, F403
 from .post_processing import *  # noqa: F401, F403
 from .utils import *  # noqa: F401, F403
-from .visualizer import *  # noqa: F401, F403
+from .visualization import *  # noqa: F401, F403
 from .voxel import *  # noqa: F401, F403
--- a/mmdet3d/core/visualization/__init__.py
+++ b/mmdet3d/core/visualization/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .local_visualizer import Det3DLocalVisualizer
+from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
+                        proj_lidar_bbox3d_to_img, write_obj,
+                        write_oriented_bbox)
+__all__ = [
+    'Det3DLocalVisualizer', 'proj_depth_bbox3d_to_img',
+    'proj_camera_bbox3d_to_img', 'proj_lidar_bbox3d_to_img',
+    'write_oriented_bbox', 'write_obj'
+]
--- a/mmdet3d/core/visualization/local_visualizer.py
+++ b/mmdet3d/core/visualization/local_visualizer.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from os import path as osp
+from typing import Dict, List, Optional, Tuple, Union
+import mmcv
+import numpy as np
+from mmengine.dist import master_only
+from torch import Tensor
+try:
+    import open3d as o3d
+    from open3d import geometry
+except ImportError:
+    raise ImportError(
+        'Please run "pip install open3d" to install open3d first.')
+from mmengine.data import InstanceData
+from mmengine.visualization.utils import check_type, tensor2ndarray
+from mmdet3d.core import (BaseInstance3DBoxes, DepthInstance3DBoxes,
+                          Det3DDataSample, PixelData)
+from mmdet3d.registry import VISUALIZERS
+from mmdet.core.visualization import DetLocalVisualizer
+from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
+                        proj_lidar_bbox3d_to_img, to_depth_mode, write_obj,
+                        write_oriented_bbox)
+@VISUALIZERS.register_module()
+class Det3DLocalVisualizer(DetLocalVisualizer):
+    """MMDetection3D Local Visualizer.
+    - 3D detection and segmentation drawing methods
+      - draw_bboxes_3d: draw 3D bounding boxes on point clouds
+      - draw_proj_bboxes_3d: draw projected 3D bounding boxes on image
+    Args:
+        name (str): Name of the instance. Defaults to 'visualizer'.
+        image (np.ndarray, optional): the origin image to draw. The format
+            should be RGB. Defaults to None.
+        vis_backends (list, optional): Visual backend config list.
+            Defaults to None.
+        save_dir (str, optional): Save file dir for all storage backends.
+            If it is None, the backend storage will not save any data.
+        bbox_color (str, tuple(int), optional): Color of bbox lines.
+            The tuple of color should be in BGR order. Defaults to None.
+        text_color (str, tuple(int), optional): Color of texts.
+            The tuple of color should be in BGR order.
+            Defaults to (200, 200, 200).
+        mask_color (str, tuple(int), optional): Color of masks.
+            The tuple of color should be in BGR order.
+            Defaults to None.
+        line_width (int, float): The linewidth of lines.
+            Defaults to 3.
+        alpha (int, float): The transparency of bboxes or mask.
+                Defaults to 0.8.
+    Examples:
+        >>> import numpy as np
+        >>> import torch
+        >>> from mmengine.data import InstanceData
+        >>> from mmdet3d.core import Det3DDataSample
+        >>> from mmdet3d.core import Det3DLocalVisualizer
+        >>> det3d_local_visualizer = Det3DLocalVisualizer()
+        >>> image = np.random.randint(0, 256,
+        ...                     size=(10, 12, 3)).astype('uint8')
+        >>> gt_instances_3d = InstanceData()
+        >>> gt_instances_3d.bboxes_3d = BaseInstance3DBoxes(torch.rand((5, 7)))
+        >>> gt_instances_3d.labels_3d = torch.randint(0, 2, (1,))
+        >>> gt_det3d_data_sample = Det3DDataSample()
+        >>> gt_det3d_data_sample.gt_instances_3d = gt_instances_3d
+        >>> det3d_local_visualizer.add_datasample('image', image,
+        ...                         gt_det_data_sample)
+        >>> det3d_local_visualizer.add_datasample(
+        ...                       'image', image, gt_det_data_sample,
+        ...                        out_file='out_file.jpg')
+        >>> det3d_local_visualizer.add_datasample(
+        ...                        'image', image, gt_det_data_sample,
+        ...                         show=True)
+        >>> pred_instances = InstanceData()
+        >>> pred_instances.bboxes_3d = torch.Tensor([[2, 4, 4, 8]])
+        >>> pred_instances.labels_3d = torch.randint(0, 2, (1,))
+        >>> pred_det_data_sample = Det3DDataSample()
+        >>> pred_det_data_sample.pred_instances = pred_instances
+        >>> det_local_visualizer.add_datasample('image', image,
+        ...                         gt_det_data_sample,
+        ...                         pred_det_data_sample)
+    """
+    def __init__(self,
+                 name: str = 'visualizer',
+                 image: Optional[np.ndarray] = None,
+                 vis_backends: Optional[Dict] = None,
+                 save_dir: Optional[str] = None,
+                 bbox_color: Optional[Union[str, Tuple[int]]] = None,
+                 text_color: Optional[Union[str,
+                                            Tuple[int]]] = (200, 200, 200),
+                 mask_color: Optional[Union[str, Tuple[int]]] = None,
+                 line_width: Union[int, float] = 3,
+                 vis_cfg: Optional[Dict] = None,
+                 alpha: float = 0.8):
+        super().__init__(
+            name=name,
+            image=image,
+            vis_backends=vis_backends,
+            save_dir=save_dir,
+            bbox_color=bbox_color,
+            text_color=text_color,
+            mask_color=mask_color,
+            line_width=line_width,
+            alpha=alpha)
+        self.o3d_vis = self._initialize_o3d_vis(vis_cfg)
+    def _initialize_o3d_vis(self, vis_cfg) -> tuple:
+        """Build open3d vis according to vis_cfg.
+        Args:
+            vis_cfg (dict): The config to build open3d vis.
+        Returns:
+             tuple: build open3d vis.
+        """
+        # init open3d visualizer
+        o3d_vis = o3d.visualization.Visualizer()
+        o3d_vis.create_window()
+        # create coordinate frame
+        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(vis_cfg)
+        o3d_vis.add_geometry(mesh_frame)
+        return o3d_vis
+    @master_only
+    def set_points(self,
+                   points: np.ndarray,
+                   vis_task: str,
+                   points_color: Tuple = (0.5, 0.5, 0.5),
+                   points_size: int = 2,
+                   mode: str = 'xyz') -> None:
+        """Set the points to draw.
+        Args:
+            points (numpy.array, shape=[N, 3+C]):
+                points to visualize.
+            vis_task (str): Visualiztion task, it includes:
+                'det', 'multi_modality-det', 'mono-det', 'seg'.
+            point_color (tuple[float], optional): the color of points.
+                Default: (0.5, 0.5, 0.5).
+            points_size (int, optional): the size of points to show
+                on visualizer. Default: 2.
+            mode (str, optional):  indicate type of the input points,
+                available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+        """
+        assert points is not None
+        check_type('points', points, np.ndarray)
+        if self.pcd and vis_task != 'seg':
+            self.o3d_vis.remove_geometry(self.pcd)
+        # set points size in Open3D
+        self.o3d_vis.get_render_option().point_size = points_size
+        points = points.copy()
+        pcd = geometry.PointCloud()
+        if mode == 'xyz':
+            pcd.points = o3d.utility.Vector3dVector(points[:, :3])
+            points_colors = np.tile(
+                np.array(points_color), (points.shape[0], 1))
+        elif mode == 'xyzrgb':
+            pcd.points = o3d.utility.Vector3dVector(points[:, :3])
+            points_colors = points[:, 3:6]
+            # normalize to [0, 1] for Open3D drawing
+            if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
+                points_colors /= 255.0
+        else:
+            raise NotImplementedError
+        pcd.colors = o3d.utility.Vector3dVector(points_colors)
+        self.o3d_vis.add_geometry(pcd)
+        self.pcd = pcd
+        self.points_color = points_color
+    # TODO: assign 3D Box color according to pred / GT labels
+    # We draw GT / pred bboxes on the same point cloud scenes
+    # for better detection performance comparison
+    def draw_bboxes_3d(self,
+                       bboxes_3d: DepthInstance3DBoxes,
+                       bbox_color=(0, 1, 0),
+                       points_in_box_color=(1, 0, 0),
+                       rot_axis=2,
+                       center_mode='lidar_bottom',
+                       mode='xyz'):
+        """Draw bbox on visualizer and change the color of points inside
+        bbox3d.
+        Args:
+            bboxes_3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
+                3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+            bbox_color (tuple[float], optional): the color of 3D bboxes.
+                Default: (0, 1, 0).
+            points_in_box_color (tuple[float], optional):
+                the color of points inside 3D bboxes. Default: (1, 0, 0).
+            rot_axis (int, optional): rotation axis of 3D bboxes.
+                Default: 2.
+            center_mode (bool, optional): Indicates the center of bbox is
+                bottom center or gravity center. available mode
+                ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
+            mode (str, optional):  Indicates type of input points,
+                available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
+        """
+        # Before visualizing the 3D Boxes in point cloud scene
+        # we need to convert the boxes to Depth mode
+        check_type('bboxes', bboxes_3d, (DepthInstance3DBoxes))
+        # convert bboxes to numpy dtype
+        bboxes_3d = tensor2ndarray(bboxes_3d.tensor)
+        in_box_color = np.array(points_in_box_color)
+        for i in range(len(bboxes_3d)):
+            center = bboxes_3d[i, 0:3]
+            dim = bboxes_3d[i, 3:6]
+            yaw = np.zeros(3)
+            yaw[rot_axis] = bboxes_3d[i, 6]
+            rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
+            if center_mode == 'lidar_bottom':
+                # bottom center to gravity center
+                center[rot_axis] += dim[rot_axis] / 2
+            elif center_mode == 'camera_bottom':
+                # bottom center to gravity center
+                center[rot_axis] -= dim[rot_axis] / 2
+            box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
+            line_set = geometry.LineSet.create_from_oriented_bounding_box(
+                box3d)
+            line_set.paint_uniform_color(bbox_color)
+            # draw bboxes on visualizer
+            self.o3d_vis.add_geometry(line_set)
+            # change the color of points which are in box
+            if self.pcd is not None and mode == 'xyz':
+                indices = box3d.get_point_indices_within_bounding_box(
+                    self.pcd.points)
+                self.points_colors[indices] = in_box_color
+        # update points colors
+        if self.pcd is not None:
+            self.pcd.colors = o3d.utility.Vector3dVector(self.points_colors)
+            self.o3d_vis.update_geometry(self.pcd)
+    def draw_proj_bboxes_3d(self,
+                            bboxes_3d: BaseInstance3DBoxes,
+                            input_meta: dict,
+                            bbox_color: Tuple[float],
+                            line_styles: Union[str, List[str]] = '-',
+                            line_widths: Union[Union[int, float],
+                                               List[Union[int, float]]] = 2,
+                            box_mode: str = 'lidar'):
+        """Draw projected 3D boxes on the image.
+        Args:
+            bbox3d (:obj:`BaseInstance3DBoxes`, shape=[M, 7]):
+                3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
+            input_meta (dict): Input meta information.
+            bbox_color (tuple[float], optional): the color of bbox.
+                Default: (0, 1, 0).
+            line_styles (Union[str, List[str]]): The linestyle
+                of lines. ``line_styles`` can have the same length with
+                texts or just single value. If ``line_styles`` is single
+                value, all the lines will have the same linestyle.
+            line_widths (Union[Union[int, float], List[Union[int, float]]]):
+                The linewidth of lines. ``line_widths`` can have
+                the same length with lines or just single value.
+                If ``line_widths`` is single value, all the lines will
+                have the same linewidth. Defaults to 2.
+            box_mode (str): Indicate the coordinates of bbox.
+        """
+        check_type('bboxes', bboxes_3d, BaseInstance3DBoxes)
+        if box_mode == 'depth':
+            proj_bbox3d_to_img = proj_depth_bbox3d_to_img
+        elif box_mode == 'lidar':
+            proj_bbox3d_to_img = proj_lidar_bbox3d_to_img
+        elif box_mode == 'camera':
+            proj_bbox3d_to_img = proj_camera_bbox3d_to_img
+        else:
+            raise NotImplementedError(f'unsupported box mode {box_mode}')
+        # (num_bboxes_3d, 8, 2)
+        proj_bboxes_3d = proj_bbox3d_to_img(bboxes_3d, input_meta)
+        num_bboxes_3d = proj_bboxes_3d.shape[0]
+        line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
+                        (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
+        # TODO: assign each projected 3d bboxes color
+        # according to pred / gt class.
+        for i in range(num_bboxes_3d):
+            x_datas = []
+            y_datas = []
+            corners = proj_bboxes_3d[i].astype(np.int)  # (8, 2)
+            for start, end in line_indices:
+                x_datas.append([corners[start][0], corners[end][0]])
+                y_datas.append([corners[start][1], corners[end][1]])
+            x_datas = np.array(x_datas)
+            y_datas = np.array(y_datas)
+            self.draw_lines(x_datas, y_datas, bbox_color, line_styles,
+                            line_widths)
+    def draw_seg_mask(self, seg_mask_colors: np.array, vis_task: str):
+        """Add segmentation mask to visualizer via per-point colorization.
+        Args:
+            seg_mask_colors (numpy.array, shape=[N, 6]):
+                The segmentation mask whose first 3 dims are point coordinates
+                and last 3 dims are converted colors.
+        """
+        # we can't draw the colors on existing points
+        # in case gt and pred mask would overlap
+        # instead we set a large offset along x-axis for each seg mask
+        self.seg_num += 1
+        offset = (np.array(self.pcd.points).max(0) -
+                  np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
+        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
+            size=1, origin=[offset, 0, 0])  # create coordinate frame for seg
+        self.o3d_vis.add_geometry(mesh_frame)
+        seg_points = copy.deepcopy(seg_mask_colors)
+        seg_points[:, 0] += offset
+        self.set_points(seg_points, vis_task, self.points_size, mode='xyzrgb')
+    def _draw_instances_3d(self, data_input: dict, instances: InstanceData,
+                           input_meta: dict, vis_task: str,
+                           palette: Optional[List[tuple]]):
+        """Draw 3D instances of GT or prediction.
+        Args:
+            data_input (dict): The input dict to draw.
+            instances (:obj:`InstanceData`): Data structure for
+                instance-level annotations or predictions.
+            metainfo (dict): Meta information.
+            vis_task (str): Visualiztion task, it includes:
+                'det', 'multi_modality-det', 'mono-det'.
+        Returns:
+            np.ndarray: the drawn image which channel is RGB.
+        """
+        bboxes_3d = instances.bboxes_3d  # BaseInstance3DBoxes
+        drawn_img = None
+        drawn_points = None
+        drawn_bboxes_3d = None
+        if vis_task in ['det', 'multi_modality-det']:
+            assert 'points' in data_input
+            points = data_input['points']
+            check_type('points', points, (np.ndarray, Tensor))
+            points = tensor2ndarray(points)
+            if not isinstance(bboxes_3d, DepthInstance3DBoxes):
+                points, bboxes_3d_depth = to_depth_mode(points, bboxes_3d)
+            else:
+                bboxes_3d_depth = bboxes_3d.clone()
+            self.set_points(points, vis_task)
+            self.draw_bboxes_3d(bboxes_3d_depth)
+            drawn_bboxes_3d = tensor2ndarray(bboxes_3d_depth.tensor)
+            drawn_points = points
+        if vis_task in ['mono-det', 'multi_modality-det']:
+            assert 'img' in data_input
+            image = data_input['img']
+            self.set_image(image)
+            self.draw_proj_bboxes_3d(bboxes_3d, input_meta)
+            drawn_img = self.get_image()
+        data_3d = dict(
+            points=drawn_points, img=drawn_img, bboxes_3d=drawn_bboxes_3d)
+        return data_3d
+    def _draw_pts_sem_seg(self,
+                          points: Tensor,
+                          pts_seg: PixelData,
+                          vis_task: str,
+                          palette: Optional[List[tuple]] = None,
+                          ignore_index: Optional[int] = None):
+        check_type('points', points, (np.ndarray, Tensor))
+        points = tensor2ndarray(points)
+        pts_sem_seg = tensor2ndarray(pts_seg.pts_semantic_mask)
+        if ignore_index is not None:
+            points = points[pts_sem_seg != ignore_index]
+            pts_sem_seg = pts_sem_seg[pts_sem_seg != ignore_index]
+        pts_color = palette[pts_sem_seg]
+        seg_color = np.concatenate([points[:, :3], pts_color], axis=1)
+        self.set_points(points, vis_task)
+        self.draw_seg_mask(seg_color, vis_task)
+        seg_data_3d = dict(points=points, seg_color=seg_color)
+        return seg_data_3d
+    @master_only
+    def show(self,
+             vis_task: str = None,
+             out_file: str = None,
+             drawn_img_3d: Optional[np.ndarray] = None,
+             drawn_img: Optional[np.ndarray] = None,
+             win_name: str = 'image',
+             wait_time: int = 0,
+             continue_key=' ') -> None:
+        """Show the drawn image.
+        Args:
+            vis_task (str): Visualiztion task, it includes:
+                'det', 'multi_modality-det', 'mono-det'.
+            out_file (str): Output file path.
+            drawn_img (np.ndarray, optional): The image to show. If drawn_img
+                is None, it will show the image got by Visualizer. Defaults
+                to None.
+            win_name (str):  The image title. Defaults to 'image'.
+            wait_time (int): Delay in milliseconds. 0 is the special
+                value that means "forever". Defaults to 0.
+            continue_key (str): The key for users to continue. Defaults to
+                the space key.
+        """
+        if vis_task in ['det', 'multi_modality-det']:
+            self.o3d_vis.run()
+            if out_file is not None:
+                self.o3d_vis.capture_screen_image(out_file)
+            self.o3d_vis.destroy_window()
+        if vis_task in ['mono-det', 'multi_modality-det']:
+            super().show(drawn_img_3d, win_name, wait_time, continue_key)
+        if drawn_img is not None:
+            super().show(drawn_img, win_name, wait_time, continue_key)
+    @master_only
+    def add_datasample(self,
+                       name: str,
+                       data_input: dict,
+                       gt_sample: Optional['Det3DDataSample'] = None,
+                       pred_sample: Optional['Det3DDataSample'] = None,
+                       draw_gt: bool = True,
+                       draw_pred: bool = True,
+                       show: bool = False,
+                       wait_time: float = 0,
+                       out_file: Optional[str] = None,
+                       vis_task: str = 'mono-det',
+                       pred_score_thr: float = 0.3,
+                       step: int = 0) -> None:
+        """Draw datasample and save to all backends.
+        - If GT and prediction are plotted at the same time, they are
+        displayed in a stitched image where the left image is the
+        ground truth and the right image is the prediction.
+        - If ``show`` is True, all storage backends are ignored, and
+        the images will be displayed in a local window.
+        - If ``out_file`` is specified, the drawn point cloud or
+        image will be saved to ``out_file``. t is usually used when
+        the display is not available.
+        Args:
+            name (str): The image identifier.
+            data_input (dict): It should include the point clouds or image
+                to draw.
+            gt_sample (:obj:`Det3DDataSample`, optional): GT Det3DDataSample.
+                Defaults to None.
+            pred_sample (:obj:`Det3DDataSample`, optional): Prediction
+                Det3DDataSample. Defaults to None.
+            draw_gt (bool): Whether to draw GT Det3DDataSample.
+                Default to True.
+            draw_pred (bool): Whether to draw Prediction Det3DDataSample.
+                Defaults to True.
+            show (bool): Whether to display the drawn point clouds and
+                image. Default to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            out_file (str): Path to output file. Defaults to None.
+            vis-task (str): Visualization task. Defaults to 'mono-det'.
+            pred_score_thr (float): The threshold to visualize the bboxes
+                and masks. Defaults to 0.3.
+            step (int): Global step value to record. Defaults to 0.
+        """
+        classes = self.dataset_meta.get('CLASSES', None)
+        # For object detection datasets, no PALETTE is saved
+        palette = self.dataset_meta.get('PALETTE', None)
+        ignore_index = self.dataset_meta.get('ignore_index', None)
+        if draw_gt and gt_sample is not None:
+            if 'gt_instances_3d' in gt_sample:
+                gt_data_3d = self._draw_instances_3d(data_input,
+                                                     gt_sample.gt_instances_3d,
+                                                     gt_sample.metainfo,
+                                                     vis_task, palette)
+            if 'gt_instances' in gt_sample:
+                assert 'img' in data_input
+                gt_img_data = self._draw_instances(data_input['img'],
+                                                   gt_sample.gt_instances,
+                                                   classes, palette)
+            if 'gt_pts_sem_seg' in gt_sample:
+                assert classes is not None, 'class information is ' \
+                                            'not provided when ' \
+                                            'visualizing panoptic ' \
+                                            'segmentation results.'
+                assert 'points' in data_input
+                gt_seg_data_3d = \
+                    self._draw_pts_sem_seg(data_input['points'],
+                                           gt_sample.gt_pts_seg,
+                                           classes, vis_task, palette,
+                                           out_file, ignore_index)
+        if draw_pred and pred_sample is not None:
+            if 'pred_instances_3d' in pred_sample:
+                pred_instances_3d = pred_sample.pred_instances_3d
+                pred_instances_3d = pred_instances_3d[
+                    pred_instances_3d.scores_3d > pred_score_thr].cpu()
+                pred_data_3d = self._draw_instances_3d(data_input,
+                                                       pred_instances_3d,
+                                                       pred_sample.metainfo,
+                                                       vis_task, palette)
+            if 'pred_instances' in pred_sample:
+                assert 'img' in data_input
+                pred_instances = pred_sample.pred_instances
+                pred_instances = pred_instances_3d[
+                    pred_instances.scores > pred_score_thr].cpu()
+                pred_img_data = self._draw_instances(data_input['img'],
+                                                     pred_instances, classes,
+                                                     palette)
+            if 'pred_pts_sem_seg' in pred_sample:
+                assert classes is not None, 'class information is ' \
+                                            'not provided when ' \
+                                            'visualizing panoptic ' \
+                                            'segmentation results.'
+                assert 'points' in data_input
+                pred_seg_data_3d = \
+                    self._draw_pts_sem_seg(data_input['points'],
+                                           pred_sample.pred_pts_seg,
+                                           classes, palette, out_file,
+                                           ignore_index)
+        # monocular 3d object detection image
+        if gt_data_3d is not None and pred_data_3d is not None:
+            drawn_img_3d = np.concatenate(
+                (gt_data_3d['img'], pred_data_3d['img']), axis=1)
+        elif gt_data_3d is not None:
+            drawn_img_3d = gt_data_3d['img']
+        elif pred_data_3d is not None:
+            drawn_img_3d = pred_data_3d['img']
+        else:
+            drawn_img_3d = None
+        # 2d object detection image
+        if gt_img_data is not None and pred_img_data is not None:
+            drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1)
+        elif gt_img_data is not None:
+            drawn_img = gt_img_data
+        elif pred_img_data is not None:
+            drawn_img = pred_img_data
+        else:
+            drawn_img = None
+        if show:
+            self.show(
+                vis_task,
+                out_file,
+                drawn_img_3d,
+                drawn_img,
+                win_name=name,
+                wait_time=wait_time)
+        if out_file is not None:
+            if drawn_img_3d is not None:
+                mmcv.imwrite(drawn_img_3d[..., ::-1], out_file)
+            if drawn_img is not None:
+                mmcv.imwrite(drawn_img[..., ::-1], out_file)
+            if gt_data_3d is not None:
+                write_obj(gt_data_3d['points'],
+                          osp.join(out_file, 'points.obj'))
+                write_oriented_bbox(gt_data_3d['bboxes_3d'],
+                                    osp.join(out_file, 'gt_bbox.obj'))
+            if pred_data_3d is not None:
+                write_obj(pred_data_3d['points'],
+                          osp.join(out_file, 'points.obj'))
+                write_oriented_bbox(pred_data_3d['bboxes_3d'],
+                                    osp.join(out_file, 'pred_bbox.obj'))
+            if gt_seg_data_3d is not None:
+                write_obj(gt_seg_data_3d['points'],
+                          osp.join(out_file, 'points.obj'))
+                write_obj(gt_seg_data_3d['seg_color'],
+                          osp.join(out_file, 'gt_seg.obj'))
+            if pred_seg_data_3d is not None:
+                write_obj(pred_seg_data_3d['points'],
+                          osp.join(out_file, 'points.obj'))
+                write_obj(pred_seg_data_3d['seg_color'],
+                          osp.join(out_file, 'pred_seg.obj'))
+        else:
+            self.add_image(name, drawn_img_3d, step)
--- a/mmdet3d/core/visualization/vis_utils.py
+++ b/mmdet3d/core/visualization/vis_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import numpy as np
+import torch
+import trimesh
+from mmdet3d.core.bbox import Box3DMode, Coord3DMode
+from mmdet3d.core.bbox.structures.cam_box3d import CameraInstance3DBoxes
+from mmdet3d.core.bbox.structures.depth_box3d import DepthInstance3DBoxes
+from mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes
+def write_obj(points, out_filename):
+    """Write points into ``obj`` format for meshlab visualization.
+    Args:
+        points (np.ndarray): Points in shape (N, dim).
+        out_filename (str): Filename to be saved.
+    """
+    N = points.shape[0]
+    fout = open(out_filename, 'w')
+    for i in range(N):
+        if points.shape[1] == 6:
+            c = points[i, 3:].astype(int)
+            fout.write(
+                'v %f %f %f %d %d %d\n' %
+                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
+        else:
+            fout.write('v %f %f %f\n' %
+                       (points[i, 0], points[i, 1], points[i, 2]))
+    fout.close()
+def write_oriented_bbox(scene_bbox, out_filename):
+    """Export oriented (around Z axis) scene bbox to meshes.
+    Args:
+        scene_bbox(list[ndarray] or ndarray): xyz pos of center and
+            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
+            Y forward, X right, Z upward. heading angle of positive X is 0,
+            heading angle of positive Y is 90 degrees.
+        out_filename(str): Filename.
+    """
+    def heading2rotmat(heading_angle):
+        rotmat = np.zeros((3, 3))
+        rotmat[2, 2] = 1
+        cosval = np.cos(heading_angle)
+        sinval = np.sin(heading_angle)
+        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
+        return rotmat
+    def convert_oriented_box_to_trimesh_fmt(box):
+        ctr = box[:3]
+        lengths = box[3:6]
+        trns = np.eye(4)
+        trns[0:3, 3] = ctr
+        trns[3, 3] = 1.0
+        trns[0:3, 0:3] = heading2rotmat(box[6])
+        box_trimesh_fmt = trimesh.creation.box(lengths, trns)
+        return box_trimesh_fmt
+    if len(scene_bbox) == 0:
+        scene_bbox = np.zeros((1, 7))
+    scene = trimesh.scene.Scene()
+    for box in scene_bbox:
+        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
+    mesh_list = trimesh.util.concatenate(scene.dump())
+    # save to obj file
+    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
+    return
+def to_depth_mode(points, bboxes):
+    """Convert points and bboxes to Depth Coord and Depth Box mode."""
+    if points is not None:
+        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
+                                           Coord3DMode.DEPTH)
+    if bboxes is not None:
+        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
+                                   Box3DMode.DEPTH)
+    return points, bboxes
+# TODO: refactor lidar2img to img_meta
+def proj_lidar_bbox3d_to_img(bboxes_3d: LiDARInstance3DBoxes,
+                             input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane.
+    Args:
+        bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+            3d bbox in lidar coordinate system to visualize.
+        lidar2img (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+    """
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    pts_4d = np.concatenate(
+        [corners_3d.reshape(-1, 3),
+         np.ones((num_bbox * 8, 1))], axis=-1)
+    lidar2img = copy.deepcopy(input_meta['lidar2img']).reshape(4, 4)
+    if isinstance(lidar2img, torch.Tensor):
+        lidar2img = lidar2img.cpu().numpy()
+    pts_2d = pts_4d @ lidar2img.T
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
+    return imgfov_pts_2d
+# TODO: remove third parameter in all functions here in favour of img_metas
+def proj_depth_bbox3d_to_img(bboxes_3d: DepthInstance3DBoxes,
+                             input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane and draw on input image.
+    Args:
+        bboxes_3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in depth coordinate system to visualize.
+        input_meta (dict): Used in coordinates transformation.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+    from mmdet3d.models import apply_3d_transformation
+    input_meta = copy.deepcopy(input_meta)
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+    # first reverse the data transformations
+    xyz_depth = apply_3d_transformation(
+        points_3d, 'DEPTH', input_meta, reverse=True)
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(xyz_depth,
+                               xyz_depth.new_tensor(input_meta['depth2img']))
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+    return imgfov_pts_2d
+# project the camera bboxes 3d to image
+def proj_camera_bbox3d_to_img(bboxes_3d: CameraInstance3DBoxes,
+                              input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane and draw on input image.
+    Args:
+        bboxes_3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in camera coordinate system to visualize.
+        cam2img (np.array)): Camera intrinsic matrix,
+            denoted as `K` in depth bbox coordinate system.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+    cam2img = copy.deepcopy(input_meta['cam2img'])
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+    if not isinstance(cam2img, torch.Tensor):
+        cam2img = torch.from_numpy(np.array(cam2img))
+    assert (cam2img.shape == torch.Size([3, 3])
+            or cam2img.shape == torch.Size([4, 4]))
+    cam2img = cam2img.float().cpu()
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(points_3d, cam2img)
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+    return imgfov_pts_2d
--- a/mmdet3d/core/visualizer/__init__.py
+++ b/mmdet3d/core/visualizer/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from .show_result import (show_multi_modality_result, show_result,
-                          show_seg_result)
-__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
--- a/mmdet3d/core/visualizer/image_vis.py
+++ b/mmdet3d/core/visualizer/image_vis.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import cv2
-import numpy as np
-import torch
-from matplotlib import pyplot as plt
-def project_pts_on_img(points,
-                       raw_img,
-                       lidar2img_rt,
-                       max_distance=70,
-                       thickness=-1):
-    """Project the 3D points cloud on 2D image.
-    Args:
-        points (numpy.array): 3D points cloud (x, y, z) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        max_distance (float, optional): the max distance of the points cloud.
-            Default: 70.
-        thickness (int, optional): The thickness of 2D points. Default: -1.
-    """
-    img = raw_img.copy()
-    num_points = points.shape[0]
-    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
-    pts_2d = pts_4d @ lidar2img_rt.T
-    # cam_points is Tensor of Nx4 whose last column is 1
-    # transform camera coordinate to image coordinate
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-    fov_inds = ((pts_2d[:, 0] < img.shape[1])
-                & (pts_2d[:, 0] >= 0)
-                & (pts_2d[:, 1] < img.shape[0])
-                & (pts_2d[:, 1] >= 0))
-    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
-    cmap = plt.cm.get_cmap('hsv', 256)
-    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
-    for i in range(imgfov_pts_2d.shape[0]):
-        depth = imgfov_pts_2d[i, 2]
-        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
-        cv2.circle(
-            img,
-            center=(int(np.round(imgfov_pts_2d[i, 0])),
-                    int(np.round(imgfov_pts_2d[i, 1]))),
-            radius=1,
-            color=tuple(color),
-            thickness=thickness,
-        )
-    cv2.imshow('project_pts_img', img.astype(np.uint8))
-    cv2.waitKey(100)
-def plot_rect3d_on_img(img,
-                       num_rects,
-                       rect_corners,
-                       color=(0, 255, 0),
-                       thickness=1):
-    """Plot the boundary lines of 3D rectangular on 2D images.
-    Args:
-        img (numpy.array): The numpy array of image.
-        num_rects (int): Number of 3D rectangulars.
-        rect_corners (numpy.array): Coordinates of the corners of 3D
-            rectangulars. Should be in the shape of [num_rect, 8, 2].
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
-                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
-    for i in range(num_rects):
-        corners = rect_corners[i].astype(np.int)
-        for start, end in line_indices:
-            cv2.line(img, (corners[start, 0], corners[start, 1]),
-                     (corners[end, 0], corners[end, 1]), color, thickness,
-                     cv2.LINE_AA)
-    return img.astype(np.uint8)
-def draw_lidar_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             lidar2img_rt,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-    Args:
-        bboxes3d (:obj:`LiDARInstance3DBoxes`):
-            3d bbox in lidar coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        img_metas (dict): Useless here.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    img = raw_img.copy()
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    pts_4d = np.concatenate(
-        [corners_3d.reshape(-1, 3),
-         np.ones((num_bbox * 8, 1))], axis=-1)
-    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
-    if isinstance(lidar2img_rt, torch.Tensor):
-        lidar2img_rt = lidar2img_rt.cpu().numpy()
-    pts_2d = pts_4d @ lidar2img_rt.T
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
-# TODO: remove third parameter in all functions here in favour of img_metas
-def draw_depth_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             calibs,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-    Args:
-        bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
-            3d bbox in depth coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        calibs (dict): Camera calibration information, Rt and K.
-        img_metas (dict): Used in coordinates transformation.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    from mmdet3d.core.bbox import points_cam2img
-    from mmdet3d.models import apply_3d_transformation
-    img = raw_img.copy()
-    img_metas = copy.deepcopy(img_metas)
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    points_3d = corners_3d.reshape(-1, 3)
-    # first reverse the data transformations
-    xyz_depth = apply_3d_transformation(
-        points_3d, 'DEPTH', img_metas, reverse=True)
-    # project to 2d to get image coords (uv)
-    uv_origin = points_cam2img(xyz_depth,
-                               xyz_depth.new_tensor(img_metas['depth2img']))
-    uv_origin = (uv_origin - 1).round()
-    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
-def draw_camera_bbox3d_on_img(bboxes3d,
-                              raw_img,
-                              cam2img,
-                              img_metas,
-                              color=(0, 255, 0),
-                              thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-    Args:
-        bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
-            3d bbox in camera coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        cam2img (dict): Camera intrinsic matrix,
-            denoted as `K` in depth bbox coordinate system.
-        img_metas (dict): Useless here.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    from mmdet3d.core.bbox import points_cam2img
-    img = raw_img.copy()
-    cam2img = copy.deepcopy(cam2img)
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    points_3d = corners_3d.reshape(-1, 3)
-    if not isinstance(cam2img, torch.Tensor):
-        cam2img = torch.from_numpy(np.array(cam2img))
-    assert (cam2img.shape == torch.Size([3, 3])
-            or cam2img.shape == torch.Size([4, 4]))
-    cam2img = cam2img.float().cpu()
-    # project to 2d to get image coords (uv)
-    uv_origin = points_cam2img(points_3d, cam2img)
-    uv_origin = (uv_origin - 1).round()
-    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
--- a/mmdet3d/core/visualizer/open3d_vis.py
+++ b/mmdet3d/core/visualizer/open3d_vis.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import numpy as np
-import torch
-try:
-    import open3d as o3d
-    from open3d import geometry
-except ImportError:
-    raise ImportError(
-        'Please run "pip install open3d" to install open3d first.')
-def _draw_points(points,
-                 vis,
-                 points_size=2,
-                 point_color=(0.5, 0.5, 0.5),
-                 mode='xyz'):
-    """Draw points on visualizer.
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    Returns:
-        tuple: points, color of each point.
-    """
-    vis.get_render_option().point_size = points_size  # set points size
-    if isinstance(points, torch.Tensor):
-        points = points.cpu().numpy()
-    points = points.copy()
-    pcd = geometry.PointCloud()
-    if mode == 'xyz':
-        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
-        points_colors = np.tile(np.array(point_color), (points.shape[0], 1))
-    elif mode == 'xyzrgb':
-        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
-        points_colors = points[:, 3:6]
-        # normalize to [0, 1] for open3d drawing
-        if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
-            points_colors /= 255.0
-    else:
-        raise NotImplementedError
-    pcd.colors = o3d.utility.Vector3dVector(points_colors)
-    vis.add_geometry(pcd)
-    return pcd, points_colors
-def _draw_bboxes(bbox3d,
-                 vis,
-                 points_colors,
-                 pcd=None,
-                 bbox_color=(0, 1, 0),
-                 points_in_box_color=(1, 0, 0),
-                 rot_axis=2,
-                 center_mode='lidar_bottom',
-                 mode='xyz'):
-    """Draw bbox on visualizer and change the color of points inside bbox3d.
-    Args:
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        points_colors (numpy.array): color of each points.
-        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
-            Default: None.
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points inside bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    if isinstance(bbox3d, torch.Tensor):
-        bbox3d = bbox3d.cpu().numpy()
-    bbox3d = bbox3d.copy()
-    in_box_color = np.array(points_in_box_color)
-    for i in range(len(bbox3d)):
-        center = bbox3d[i, 0:3]
-        dim = bbox3d[i, 3:6]
-        yaw = np.zeros(3)
-        yaw[rot_axis] = bbox3d[i, 6]
-        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
-        if center_mode == 'lidar_bottom':
-            center[rot_axis] += dim[
-                rot_axis] / 2  # bottom center to gravity center
-        elif center_mode == 'camera_bottom':
-            center[rot_axis] -= dim[
-                rot_axis] / 2  # bottom center to gravity center
-        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
-        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
-        line_set.paint_uniform_color(bbox_color)
-        # draw bboxes on visualizer
-        vis.add_geometry(line_set)
-        # change the color of points which are in box
-        if pcd is not None and mode == 'xyz':
-            indices = box3d.get_point_indices_within_bounding_box(pcd.points)
-            points_colors[indices] = in_box_color
-    # update points colors
-    if pcd is not None:
-        pcd.colors = o3d.utility.Vector3dVector(points_colors)
-        vis.update_geometry(pcd)
-def show_pts_boxes(points,
-                   bbox3d=None,
-                   show=True,
-                   save_path=None,
-                   points_size=2,
-                   point_color=(0.5, 0.5, 0.5),
-                   bbox_color=(0, 1, 0),
-                   points_in_box_color=(1, 0, 0),
-                   rot_axis=2,
-                   center_mode='lidar_bottom',
-                   mode='xyz'):
-    """Draw bbox and points on visualizer.
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
-            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            Defaults to None.
-        show (bool, optional): whether to show the visualization results.
-            Default: True.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is bottom
-            center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points, available
-            mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    # TODO: support score and class info
-    assert 0 <= rot_axis <= 2
-    # init visualizer
-    vis = o3d.visualization.Visualizer()
-    vis.create_window()
-    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-        size=1, origin=[0, 0, 0])  # create coordinate frame
-    vis.add_geometry(mesh_frame)
-    # draw points
-    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
-                                      mode)
-    # draw boxes
-    if bbox3d is not None:
-        _draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,
-                     points_in_box_color, rot_axis, center_mode, mode)
-    if show:
-        vis.run()
-    if save_path is not None:
-        vis.capture_screen_image(save_path)
-    vis.destroy_window()
-def _draw_bboxes_ind(bbox3d,
-                     vis,
-                     indices,
-                     points_colors,
-                     pcd=None,
-                     bbox_color=(0, 1, 0),
-                     points_in_box_color=(1, 0, 0),
-                     rot_axis=2,
-                     center_mode='lidar_bottom',
-                     mode='xyz'):
-    """Draw bbox on visualizer and change the color or points inside bbox3d
-    with indices.
-    Args:
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        indices (numpy.array | torch.tensor, shape=[N, M]):
-            indicate which bbox3d that each point lies in.
-        points_colors (numpy.array): color of each points.
-        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
-            Default: None.
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    if isinstance(bbox3d, torch.Tensor):
-        bbox3d = bbox3d.cpu().numpy()
-    if isinstance(indices, torch.Tensor):
-        indices = indices.cpu().numpy()
-    bbox3d = bbox3d.copy()
-    in_box_color = np.array(points_in_box_color)
-    for i in range(len(bbox3d)):
-        center = bbox3d[i, 0:3]
-        dim = bbox3d[i, 3:6]
-        yaw = np.zeros(3)
-        # TODO: fix problem of current coordinate system
-        # dim[0], dim[1] = dim[1], dim[0]  # for current coordinate
-        # yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
-        yaw[rot_axis] = -bbox3d[i, 6]
-        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
-        if center_mode == 'lidar_bottom':
-            center[rot_axis] += dim[
-                rot_axis] / 2  # bottom center to gravity center
-        elif center_mode == 'camera_bottom':
-            center[rot_axis] -= dim[
-                rot_axis] / 2  # bottom center to gravity center
-        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
-        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
-        line_set.paint_uniform_color(bbox_color)
-        # draw bboxes on visualizer
-        vis.add_geometry(line_set)
-        # change the color of points which are in box
-        if pcd is not None and mode == 'xyz':
-            points_colors[indices[:, i].astype(np.bool)] = in_box_color
-    # update points colors
-    if pcd is not None:
-        pcd.colors = o3d.utility.Vector3dVector(points_colors)
-        vis.update_geometry(pcd)
-def show_pts_index_boxes(points,
-                         bbox3d=None,
-                         show=True,
-                         indices=None,
-                         save_path=None,
-                         points_size=2,
-                         point_color=(0.5, 0.5, 0.5),
-                         bbox_color=(0, 1, 0),
-                         points_in_box_color=(1, 0, 0),
-                         rot_axis=2,
-                         center_mode='lidar_bottom',
-                         mode='xyz'):
-    """Draw bbox and points on visualizer with indices that indicate which
-    bbox3d that each point lies in.
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            Defaults to None.
-        show (bool, optional): whether to show the visualization results.
-            Default: True.
-        indices (numpy.array | torch.tensor, shape=[N, M], optional):
-            indicate which bbox3d that each point lies in. Default: None.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    # TODO: support score and class info
-    assert 0 <= rot_axis <= 2
-    # init visualizer
-    vis = o3d.visualization.Visualizer()
-    vis.create_window()
-    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-        size=1, origin=[0, 0, 0])  # create coordinate frame
-    vis.add_geometry(mesh_frame)
-    # draw points
-    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
-                                      mode)
-    # draw boxes
-    if bbox3d is not None:
-        _draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,
-                         points_in_box_color, rot_axis, center_mode, mode)
-    if show:
-        vis.run()
-    if save_path is not None:
-        vis.capture_screen_image(save_path)
-    vis.destroy_window()
-class Visualizer(object):
-    r"""Online visualizer implemented with Open3d.
-    Args:
-        points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
-            cloud is in mode of Coord3DMode.DEPTH (please refer to
-            core.structures.coord_3d_mode).
-        bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
-            (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            The 3D bbox is in mode of Box3DMode.DEPTH with
-            gravity_center (please refer to core.structures.box_3d_mode).
-            Default: None.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    def __init__(self,
-                 points,
-                 bbox3d=None,
-                 save_path=None,
-                 points_size=2,
-                 point_color=(0.5, 0.5, 0.5),
-                 bbox_color=(0, 1, 0),
-                 points_in_box_color=(1, 0, 0),
-                 rot_axis=2,
-                 center_mode='lidar_bottom',
-                 mode='xyz'):
-        super(Visualizer, self).__init__()
-        assert 0 <= rot_axis <= 2
-        # init visualizer
-        self.o3d_visualizer = o3d.visualization.Visualizer()
-        self.o3d_visualizer.create_window()
-        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-            size=1, origin=[0, 0, 0])  # create coordinate frame
-        self.o3d_visualizer.add_geometry(mesh_frame)
-        self.points_size = points_size
-        self.point_color = point_color
-        self.bbox_color = bbox_color
-        self.points_in_box_color = points_in_box_color
-        self.rot_axis = rot_axis
-        self.center_mode = center_mode
-        self.mode = mode
-        self.seg_num = 0
-        # draw points
-        if points is not None:
-            self.pcd, self.points_colors = _draw_points(
-                points, self.o3d_visualizer, points_size, point_color, mode)
-        # draw boxes
-        if bbox3d is not None:
-            _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,
-                         self.pcd, bbox_color, points_in_box_color, rot_axis,
-                         center_mode, mode)
-    def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):
-        """Add bounding box to visualizer.
-        Args:
-            bbox3d (numpy.array, shape=[M, 7]):
-                3D bbox (x, y, z, x_size, y_size, z_size, yaw)
-                to be visualized. The 3d bbox is in mode of
-                Box3DMode.DEPTH with gravity_center (please refer to
-                core.structures.box_3d_mode).
-            bbox_color (tuple[float]): the color of bbox. Default: None.
-            points_in_box_color (tuple[float]): the color of points which
-                are in bbox3d. Default: None.
-        """
-        if bbox_color is None:
-            bbox_color = self.bbox_color
-        if points_in_box_color is None:
-            points_in_box_color = self.points_in_box_color
-        _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,
-                     bbox_color, points_in_box_color, self.rot_axis,
-                     self.center_mode, self.mode)
-    def add_seg_mask(self, seg_mask_colors):
-        """Add segmentation mask to visualizer via per-point colorization.
-        Args:
-            seg_mask_colors (numpy.array, shape=[N, 6]):
-                The segmentation mask whose first 3 dims are point coordinates
-                and last 3 dims are converted colors.
-        """
-        # we can't draw the colors on existing points
-        # in case gt and pred mask would overlap
-        # instead we set a large offset along x-axis for each seg mask
-        self.seg_num += 1
-        offset = (np.array(self.pcd.points).max(0) -
-                  np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
-        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-            size=1, origin=[offset, 0, 0])  # create coordinate frame for seg
-        self.o3d_visualizer.add_geometry(mesh_frame)
-        seg_points = copy.deepcopy(seg_mask_colors)
-        seg_points[:, 0] += offset
-        _draw_points(
-            seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')
-    def show(self, save_path=None):
-        """Visualize the points cloud.
-        Args:
-            save_path (str, optional): path to save image. Default: None.
-        """
-        self.o3d_visualizer.run()
-        if save_path is not None:
-            self.o3d_visualizer.capture_screen_image(save_path)
-        self.o3d_visualizer.destroy_window()
-        return
--- a/mmdet3d/core/visualizer/show_result.py
+++ b/mmdet3d/core/visualizer/show_result.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from os import path as osp
-import mmcv
-import numpy as np
-import trimesh
-from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
-                        draw_lidar_bbox3d_on_img)
-def _write_obj(points, out_filename):
-    """Write points into ``obj`` format for meshlab visualization.
-    Args:
-        points (np.ndarray): Points in shape (N, dim).
-        out_filename (str): Filename to be saved.
-    """
-    N = points.shape[0]
-    fout = open(out_filename, 'w')
-    for i in range(N):
-        if points.shape[1] == 6:
-            c = points[i, 3:].astype(int)
-            fout.write(
-                'v %f %f %f %d %d %d\n' %
-                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
-        else:
-            fout.write('v %f %f %f\n' %
-                       (points[i, 0], points[i, 1], points[i, 2]))
-    fout.close()
-def _write_oriented_bbox(scene_bbox, out_filename):
-    """Export oriented (around Z axis) scene bbox to meshes.
-    Args:
-        scene_bbox(list[ndarray] or ndarray): xyz pos of center and
-            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
-            Y forward, X right, Z upward. heading angle of positive X is 0,
-            heading angle of positive Y is 90 degrees.
-        out_filename(str): Filename.
-    """
-    def heading2rotmat(heading_angle):
-        rotmat = np.zeros((3, 3))
-        rotmat[2, 2] = 1
-        cosval = np.cos(heading_angle)
-        sinval = np.sin(heading_angle)
-        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
-        return rotmat
-    def convert_oriented_box_to_trimesh_fmt(box):
-        ctr = box[:3]
-        lengths = box[3:6]
-        trns = np.eye(4)
-        trns[0:3, 3] = ctr
-        trns[3, 3] = 1.0
-        trns[0:3, 0:3] = heading2rotmat(box[6])
-        box_trimesh_fmt = trimesh.creation.box(lengths, trns)
-        return box_trimesh_fmt
-    if len(scene_bbox) == 0:
-        scene_bbox = np.zeros((1, 7))
-    scene = trimesh.scene.Scene()
-    for box in scene_bbox:
-        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
-    mesh_list = trimesh.util.concatenate(scene.dump())
-    # save to obj file
-    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
-    return
-def show_result(points,
-                gt_bboxes,
-                pred_bboxes,
-                out_dir,
-                filename,
-                show=False,
-                snapshot=False,
-                pred_labels=None):
-    """Convert results into format that is directly readable for meshlab.
-    Args:
-        points (np.ndarray): Points.
-        gt_bboxes (np.ndarray): Ground truth boxes.
-        pred_bboxes (np.ndarray): Predicted boxes.
-        out_dir (str): Path of output directory
-        filename (str): Filename of the current frame.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-        pred_labels (np.ndarray, optional): Predicted labels of boxes.
-            Defaults to None.
-    """
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-    if show:
-        from .open3d_vis import Visualizer
-        vis = Visualizer(points)
-        if pred_bboxes is not None:
-            if pred_labels is None:
-                vis.add_bboxes(bbox3d=pred_bboxes)
-            else:
-                palette = np.random.randint(
-                    0, 255, size=(pred_labels.max() + 1, 3)) / 256
-                labelDict = {}
-                for j in range(len(pred_labels)):
-                    i = int(pred_labels[j].numpy())
-                    if labelDict.get(i) is None:
-                        labelDict[i] = []
-                    labelDict[i].append(pred_bboxes[j])
-                for i in labelDict:
-                    vis.add_bboxes(
-                        bbox3d=np.array(labelDict[i]),
-                        bbox_color=palette[i],
-                        points_in_box_color=palette[i])
-        if gt_bboxes is not None:
-            vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
-        show_path = osp.join(result_path,
-                             f'{filename}_online.png') if snapshot else None
-        vis.show(show_path)
-    if points is not None:
-        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
-    if gt_bboxes is not None:
-        # bottom center to gravity center
-        gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2
-        _write_oriented_bbox(gt_bboxes,
-                             osp.join(result_path, f'{filename}_gt.obj'))
-    if pred_bboxes is not None:
-        # bottom center to gravity center
-        pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2
-        _write_oriented_bbox(pred_bboxes,
-                             osp.join(result_path, f'{filename}_pred.obj'))
-def show_seg_result(points,
-                    gt_seg,
-                    pred_seg,
-                    out_dir,
-                    filename,
-                    palette,
-                    ignore_index=None,
-                    show=False,
-                    snapshot=False):
-    """Convert results into format that is directly readable for meshlab.
-    Args:
-        points (np.ndarray): Points.
-        gt_seg (np.ndarray): Ground truth segmentation mask.
-        pred_seg (np.ndarray): Predicted segmentation mask.
-        out_dir (str): Path of output directory
-        filename (str): Filename of the current frame.
-        palette (np.ndarray): Mapping between class labels and colors.
-        ignore_index (int, optional): The label index to be ignored, e.g.
-            unannotated points. Defaults to None.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-    """
-    # we need 3D coordinates to visualize segmentation mask
-    if gt_seg is not None or pred_seg is not None:
-        assert points is not None, \
-            '3D coordinates are required for segmentation visualization'
-    # filter out ignored points
-    if gt_seg is not None and ignore_index is not None:
-        if points is not None:
-            points = points[gt_seg != ignore_index]
-        if pred_seg is not None:
-            pred_seg = pred_seg[gt_seg != ignore_index]
-        gt_seg = gt_seg[gt_seg != ignore_index]
-    if gt_seg is not None:
-        gt_seg_color = palette[gt_seg]
-        gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)
-    if pred_seg is not None:
-        pred_seg_color = palette[pred_seg]
-        pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],
-                                        axis=1)
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-    # online visualization of segmentation mask
-    # we show three masks in a row, scene_points, gt_mask, pred_mask
-    if show:
-        from .open3d_vis import Visualizer
-        mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'
-        vis = Visualizer(points, mode=mode)
-        if gt_seg is not None:
-            vis.add_seg_mask(gt_seg_color)
-        if pred_seg is not None:
-            vis.add_seg_mask(pred_seg_color)
-        show_path = osp.join(result_path,
-                             f'{filename}_online.png') if snapshot else None
-        vis.show(show_path)
-    if points is not None:
-        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
-    if gt_seg is not None:
-        _write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))
-    if pred_seg is not None:
-        _write_obj(pred_seg_color, osp.join(result_path,
-                                            f'{filename}_pred.obj'))
-def show_multi_modality_result(img,
-                               gt_bboxes,
-                               pred_bboxes,
-                               proj_mat,
-                               out_dir,
-                               filename,
-                               box_mode='lidar',
-                               img_metas=None,
-                               show=False,
-                               gt_bbox_color=(61, 102, 255),
-                               pred_bbox_color=(241, 101, 72)):
-    """Convert multi-modality detection results into 2D results.
-    Project the predicted 3D bbox to 2D image plane and visualize them.
-    Args:
-        img (np.ndarray): The numpy array of image in cv2 fashion.
-        gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
-        pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
-        proj_mat (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        out_dir (str): Path of output directory.
-        filename (str): Filename of the current frame.
-        box_mode (str, optional): Coordinate system the boxes are in.
-            Should be one of 'depth', 'lidar' and 'camera'.
-            Defaults to 'lidar'.
-        img_metas (dict, optional): Used in projecting depth bbox.
-            Defaults to None.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
-           The tuple of color should be in BGR order. Default: (255, 102, 61).
-        pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
-           The tuple of color should be in BGR order. Default: (72, 101, 241).
-    """
-    if box_mode == 'depth':
-        draw_bbox = draw_depth_bbox3d_on_img
-    elif box_mode == 'lidar':
-        draw_bbox = draw_lidar_bbox3d_on_img
-    elif box_mode == 'camera':
-        draw_bbox = draw_camera_bbox3d_on_img
-    else:
-        raise NotImplementedError(f'unsupported box mode {box_mode}')
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-    if show:
-        show_img = img.copy()
-        if gt_bboxes is not None:
-            show_img = draw_bbox(
-                gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
-        if pred_bboxes is not None:
-            show_img = draw_bbox(
-                pred_bboxes,
-                show_img,
-                proj_mat,
-                img_metas,
-                color=pred_bbox_color)
-        mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
-    if img is not None:
-        mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
-    if gt_bboxes is not None:
-        gt_img = draw_bbox(
-            gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
-        mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
-    if pred_bboxes is not None:
-        pred_img = draw_bbox(
-            pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
-        mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
--- a/tools/misc/browse_dataset.py
+++ b/tools/misc/browse_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
-import warnings
 from os import path as osp
-from pathlib import Path
 import mmcv
-import numpy as np
 from mmcv import Config, DictAction, mkdir_or_exist
-from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
-                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
-from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
-                                     show_seg_result)
 from mmdet3d.datasets import build_dataset
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules
 def parse_args():
@@ -80,8 +75,8 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
            if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':
                show_pipeline.insert(i, cfg.train_pipeline[i])
            # Collect points as well as labels
-            if cfg.train_pipeline[i]['type'] == 'Collect3D':
+            if cfg.train_pipeline[i]['type'] == 'Pack3DInputs':
-                if show_pipeline[-1]['type'] == 'Collect3D':
+                if show_pipeline[-1]['type'] == 'Pack3DInputs':
                    show_pipeline[-1] = cfg.train_pipeline[i]
                else:
                    show_pipeline.append(cfg.train_pipeline[i])
@@ -93,105 +88,6 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
    return cfg
-def to_depth_mode(points, bboxes):
-    """Convert points and bboxes to Depth Coord and Depth Box mode."""
-    if points is not None:
-        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
-                                           Coord3DMode.DEPTH)
-    if bboxes is not None:
-        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
-                                   Box3DMode.DEPTH)
-    return points, bboxes
-def show_det_data(input, out_dir, show=False):
-    """Visualize 3D point cloud and 3D bboxes."""
-    img_metas = input['img_metas']._data
-    points = input['points']._data.numpy()
-    gt_bboxes = input['gt_bboxes_3d']._data.tensor
-    if img_metas['box_mode_3d'] != Box3DMode.DEPTH:
-        points, gt_bboxes = to_depth_mode(points, gt_bboxes)
-    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
-    show_result(
-        points,
-        gt_bboxes.clone(),
-        None,
-        out_dir,
-        filename,
-        show=show,
-        snapshot=True)
-def show_seg_data(input, out_dir, show=False):
-    """Visualize 3D point cloud and segmentation mask."""
-    img_metas = input['img_metas']._data
-    points = input['points']._data.numpy()
-    gt_seg = input['pts_semantic_mask']._data.numpy()
-    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
-    show_seg_result(
-        points,
-        gt_seg.copy(),
-        None,
-        out_dir,
-        filename,
-        np.array(img_metas['PALETTE']),
-        img_metas['ignore_index'],
-        show=show,
-        snapshot=True)
-def show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):
-    """Visualize 3D bboxes on 2D image by projection."""
-    gt_bboxes = input['gt_bboxes_3d']._data
-    img_metas = input['img_metas']._data
-    img = input['img']._data.numpy()
-    # need to transpose channel to first dim
-    img = img.transpose(1, 2, 0)
-    # no 3D gt bboxes, just show img
-    if gt_bboxes.tensor.shape[0] == 0:
-        gt_bboxes = None
-    filename = Path(img_metas['filename']).name
-    if isinstance(gt_bboxes, DepthInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            None,
-            out_dir,
-            filename,
-            box_mode='depth',
-            img_metas=img_metas,
-            show=show)
-    elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            img_metas['lidar2img'],
-            out_dir,
-            filename,
-            box_mode='lidar',
-            img_metas=img_metas,
-            show=show)
-    elif isinstance(gt_bboxes, CameraInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            img_metas['cam2img'],
-            out_dir,
-            filename,
-            box_mode='camera',
-            img_metas=img_metas,
-            show=show)
-    else:
-        # can't project, just show img
-        warnings.warn(
-            f'unrecognized gt box type {type(gt_bboxes)}, only show image')
-        show_multi_modality_result(
-            img, None, None, None, out_dir, filename, show=show)
 def main():
    args = parse_args()
@@ -200,31 +96,42 @@ def main():
    cfg = build_data_cfg(args.config, args.skip_type, args.aug,
                         args.cfg_options)
+    # register all modules in mmdet3d into the registries
+    register_all_modules()
    try:
        dataset = build_dataset(
-            cfg.data.train, default_args=dict(filter_empty_gt=False))
+            cfg.train_dataloader.dataset,
+            default_args=dict(filter_empty_gt=False))
    except TypeError:  # seg dataset doesn't have `filter_empty_gt` key
-        dataset = build_dataset(cfg.data.train)
+        dataset = build_dataset(cfg.train_dataloader.dataset)
-    dataset_type = cfg.dataset_type
    # configure visualization mode
    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'
+    visualizer = VISUALIZERS.build(cfg.visualizer)
+    visualizer.dataset_meta = dataset.metainfo
    progress_bar = mmcv.ProgressBar(len(dataset))
-    for input in dataset:
+    for item in dataset:
-        if vis_task in ['det', 'multi_modality-det']:
+        # the 3D Boxes in input could be in any of three coordinates
-            # show 3D bboxes on 3D point clouds
+        data_input = item['inputs']
-            show_det_data(input, args.output_dir, show=args.online)
+        data_sample = item['data_sample'].numpy()
-        if vis_task in ['multi_modality-det', 'mono-det']:
-            # project 3D bboxes to 2D image
+        out_file = osp.join(
-            show_proj_bbox_img(
+            args.output_dir) if args.output_dir is not None else None
-                input,
-                args.output_dir,
+        visualizer.add_datasample(
-                show=args.online,
+            '3d visualzier',
-                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
+            data_input,
-        elif vis_task in ['seg']:
+            data_sample,
-            # show 3D segmentation mask on 3D point clouds
+            show=not args.not_show,
-            show_seg_data(input, args.output_dir, show=args.online)
+            wait_time=args.show_interval,
+            out_file=out_file,
+            vis_task=vis_task)
        progress_bar.update()