[Feature] Support 3D semantic segmentation demo (#532)

* compress kitti unit test imgs * add unit test for inference_multi_modality_detector * fix typos * rename init_detector to init_model * show_result_meshlab support seg task * add unit test for seg show_result_meshlab * support inference_segmentor * support pc seg demo * add docs * minor fix * change function name * compress demo data size * update docs

[Feature] Support 3D semantic segmentation demo (#532)
* compress kitti unit test imgs * add unit test for inference_multi_modality_detector * fix typos * rename init_detector to init_model * show_result_meshlab support seg task * add unit test for seg show_result_meshlab * support inference_segmentor * support pc seg demo * add docs * minor fix * change function name * compress demo data size * update docs
3bac800e · Ziyi Wu · GitHub · b84111d8 · 3bac800e · 3bac800e
Unverified Commit 3bac800e authored May 19, 2021 by Ziyi Wu Committed by GitHub May 19, 2021
13 changed files
--- a/demo/data/scannet/scene0000_00.bin
+++ b/demo/data/scannet/scene0000_00.bin
--- a/demo/multi_modality_demo.py
+++ b/demo/multi_modality_demo.py
 from argparse import ArgumentParser
-from mmdet3d.apis import (inference_multi_modality_detector, init_detector,
+from mmdet3d.apis import (inference_multi_modality_detector, init_model,
                          show_result_meshlab)
@@ -26,7 +26,7 @@ def main():
    args = parser.parse_args()
    # build the model from a config file and a checkpoint file
-    model = init_detector(args.config, args.checkpoint, device=args.device)
+    model = init_model(args.config, args.checkpoint, device=args.device)
    # test a single image
    result, data = inference_multi_modality_detector(model, args.pcd,
                                                     args.image, args.ann)
@@ -37,7 +37,8 @@ def main():
        args.out_dir,
        args.score_thr,
        show=args.show,
-        snapshot=args.snapshot)
+        snapshot=args.snapshot,
+        task='multi_modality-det')
 if __name__ == '__main__':

--- a/demo/pc_seg_demo.py
+++ b/demo/pc_seg_demo.py
+from argparse import ArgumentParser
+from mmdet3d.apis import inference_segmentor, init_model, show_result_meshlab
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('pcd', help='Point cloud file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--out-dir', type=str, default='demo', help='dir to save results')
+    parser.add_argument(
+        '--show', action='store_true', help='show online visuliaztion results')
+    parser.add_argument(
+        '--snapshot',
+        action='store_true',
+        help='whether to save online visuliaztion results')
+    args = parser.parse_args()
+    # build the model from a config file and a checkpoint file
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    # test a single image
+    result, data = inference_segmentor(model, args.pcd)
+    # show the results
+    show_result_meshlab(
+        data,
+        result,
+        args.out_dir,
+        show=args.show,
+        snapshot=args.snapshot,
+        task='seg',
+        palette=model.PALETTE)
+if __name__ == '__main__':
+    main()
--- a/demo/pcd_demo.py
+++ b/demo/pcd_demo.py
 from argparse import ArgumentParser
-from mmdet3d.apis import inference_detector, init_detector, show_result_meshlab
+from mmdet3d.apis import inference_detector, init_model, show_result_meshlab
 def main():
@@ -23,7 +23,7 @@ def main():
    args = parser.parse_args()
    # build the model from a config file and a checkpoint file
-    model = init_detector(args.config, args.checkpoint, device=args.device)
+    model = init_model(args.config, args.checkpoint, device=args.device)
    # test a single image
    result, data = inference_detector(model, args.pcd)
    # show the results
@@ -33,7 +33,8 @@ def main():
        args.out_dir,
        args.score_thr,
        show=args.show,
-        snapshot=args.snapshot)
+        snapshot=args.snapshot,
+        task='det')
 if __name__ == '__main__':

--- a/docs/0_demo.md
+++ b/docs/0_demo.md
@@ -2,19 +2,21 @@
 ## Introduction
-We provide scipts for multi-modality/single-modality and indoor/outdoor 3D detection demos. The pre-trained models can be downloaded from [model zoo](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/model_zoo.md/). We provide pre-processed sample data from KITTI and SUN RGB-D dataset. You can use any other data following our pre-processing steps.
+We provide scipts for multi-modality/single-modality, indoor/outdoor 3D detection and 3D semantic segmentation demos. The pre-trained models can be downloaded from [model zoo](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/model_zoo.md/). We provide pre-processed sample data from KITTI, SUN RGB-D and ScanNet dataset. You can use any other data following our pre-processing steps.
 ## Testing
-### Single-modality demo
+### 3D Detection
+#### Single-modality demo
 To test a 3D detector on point cloud data, simply run:
 ```shell
-python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}]
+python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}] [--show]
 ```
-The visualization results including a point cloud and predicted 3D bounding boxes will be saved in ```demo/PCD_NAME```, which you can open using [MeshLab](http://www.meshlab.net/).
+The visualization results including a point cloud and predicted 3D bounding boxes will be saved in `${OUT_DIR}/PCD_NAME`, which you can open using [MeshLab](http://www.meshlab.net/). Note that if you set the flag `--show`, the prediction result will be displayed online using [Open3D](http://www.open3d.org/).
 Example on KITTI data using [SECOND](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/second) model:
@@ -30,15 +32,15 @@ python demo/pcd_demo.py demo/data/sunrgbd/sunrgbd_000017.bin configs/votenet/vot
 Remember to convert the VoteNet checkpoint if you are using mmdetection3d version >= 0.6.0. See its [README](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/votenet/README.md/) for detailed instructions on how to convert the checkpoint.
-### Multi-modality demo
+#### Multi-modality demo
 To test a 3D detector on multi-modality data (typically point cloud and image), simply run:
 ```shell
-python demo/multi_modality_demo.py ${PCD_FILE} ${IMAGE_FILE} ${ANNOTATION_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}]
+python demo/multi_modality_demo.py ${PCD_FILE} ${IMAGE_FILE} ${ANNOTATION_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}] [--show]
 ```
-where the ```ANNOTATION_FILE``` should provide the 3D to 2D projection matrix. The visualization results including a point cloud, an image, predicted 3D bounding boxes and their projection on the image will be saved in ```demo/PCD_NAME```.
+where the `ANNOTATION_FILE` should provide the 3D to 2D projection matrix. The visualization results including a point cloud, an image, predicted 3D bounding boxes and their projection on the image will be saved in `${OUT_DIR}/PCD_NAME`.
 Example on KITTI data using [MVX-Net](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/mvxnet) model:
@@ -51,3 +53,19 @@ Example on SUN RGB-D data using [ImVoteNet](https://github.com/open-mmlab/mmdete
 ```shell
 python demo/multi_modality_demo.py demo/data/sunrgbd/sunrgbd_000017.bin demo/data/sunrgbd/sunrgbd_000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py checkpoints/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021-d44dcb66.pth
 ```
+### 3D Segmentation
+To test a 3D segmentor on point cloud data, simply run:
+```shell
+python demo/pc_seg_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--out-dir ${OUT_DIR}] [--show]
+```
+The visualization results including a point cloud and its predicted 3D segmentation mask will be saved in `${OUT_DIR}/PCD_NAME`.
+Example on ScanNet data using [PointNet++ (SSG)](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/pointnet2) model:
+```shell
+python demo/pc_seg_demo.py demo/data/scannet/scene0000_00.bin configs/pointnet2/pointnet2_ssg_16x2_cosine_200e_scannet_seg-3d-20class.py checkpoints/pointnet2_ssg_16x2_cosine_200e_scannet_seg-3d-20class_20210514_143644-ee73704a.pth
+```
--- a/docs/1_exist_data_model.md
+++ b/docs/1_exist_data_model.md
@@ -141,7 +141,7 @@ According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you ne
 python tools/train.py ${CONFIG_FILE} [optional arguments]
 ```
-If you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.
+If you want to specify the working directory in the command, you can add an argument `--work-dir ${YOUR_WORK_DIR}`.
 ### Train with multiple GPUs

--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -279,13 +279,13 @@ More demos about single/multi-modality and indoor/outdoor 3D detection can be fo
 Here is an example of building the model and test given point clouds.
 ```python
-from mmdet3d.apis import init_detector, inference_detector
+from mmdet3d.apis import init_model, inference_detector
 config_file = 'configs/votenet/votenet_8x8_scannet-3d-18class.py'
 checkpoint_file = 'checkpoints/votenet_8x8_scannet-3d-18class_20200620_230238-2cea9c3a.pth'
 # build the model from a config file and a checkpoint file
-model = init_detector(config_file, checkpoint_file, device='cuda:0')
+model = init_model(config_file, checkpoint_file, device='cuda:0')
 # test a single image and show the results
 point_cloud = 'test.bin'

--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
 from .inference import (convert_SyncBN, inference_detector,
-                        inference_multi_modality_detector, init_detector,
+                        inference_multi_modality_detector, inference_segmentor,
-                        show_result_meshlab)
+                        init_model, show_result_meshlab)
 from .test import single_gpu_test
 from .train import train_model
 __all__ = [
-    'inference_detector', 'init_detector', 'single_gpu_test',
+    'inference_detector', 'init_model', 'single_gpu_test',
    'show_result_meshlab', 'convert_SyncBN', 'train_model',
-    'inference_multi_modality_detector'
+    'inference_multi_modality_detector', 'inference_segmentor'
 ]
--- a/mmdet3d/apis/inference.py
+++ b/mmdet3d/apis/inference.py
@@ -9,7 +9,7 @@ from os import path as osp
 from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes,
                          LiDARInstance3DBoxes, show_multi_modality_result,
-                          show_result)
+                          show_result, show_seg_result)
 from mmdet3d.core.bbox import get_box_type
 from mmdet3d.datasets.pipelines import Compose
 from mmdet3d.models import build_model
@@ -31,8 +31,9 @@ def convert_SyncBN(config):
                convert_SyncBN(config[item])
-def init_detector(config, checkpoint=None, device='cuda:0'):
+def init_model(config, checkpoint=None, device='cuda:0'):
-    """Initialize a detector from config file.
+    """Initialize a model from config file, which could be a 3D detector or a
+    3D segmentor.
    Args:
        config (str or :obj:`mmcv.Config`): Config file path or the config
@@ -59,6 +60,8 @@ def init_detector(config, checkpoint=None, device='cuda:0'):
            model.CLASSES = checkpoint['meta']['CLASSES']
        else:
            model.CLASSES = config.class_names
+        if 'PALETTE' in checkpoint['meta']:  # 3D Segmentor
+            model.PALETTE = checkpoint['meta']['PALETTE']
    model.cfg = config  # save the config in the model for convenience
    model.to(device)
    model.eval()
@@ -177,7 +180,8 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
        data['points'] = data['points'][0].data
        data['img'] = data['img'][0].data
        if box_mode_3d == Box3DMode.DEPTH:
-            data['calib'] = data['calib'][0].data
+            data['calib'][0]['Rt'] = data['calib'][0]['Rt'][0].data
+            data['calib'][0]['K'] = data['calib'][0]['K'][0].data
    # forward the model
    with torch.no_grad():
@@ -185,28 +189,56 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
    return result, data
-def show_result_meshlab(data,
+def inference_segmentor(model, pcd):
-                        result,
+    """Inference point cloud with the segmentor.
-                        out_dir,
-                        score_thr=0.0,
-                        show=False,
-                        snapshot=False):
-    """Show result by meshlab.
    Args:
-        data (dict): Contain data from pipeline.
+        model (nn.Module): The loaded segmentor.
-        result (dict): Predicted result from model.
+        pcd (str): Point cloud files.
-        out_dir (str): Directory to save visualized result.
-        score_thr (float): Minimum score of bboxes to be shown. Default: 0.0
+    Returns:
-        show (bool): Visualize the results online. Defaults to False.
+        tuple: Predicted results and data from pipeline.
-        snapshot (bool): Whether to save the online results. Defaults to False.
    """
+    cfg = model.cfg
+    device = next(model.parameters()).device  # model device
+    # build the data pipeline
+    test_pipeline = deepcopy(cfg.data.test.pipeline)
+    test_pipeline = Compose(test_pipeline)
+    data = dict(
+        pts_filename=pcd,
+        img_fields=[],
+        bbox3d_fields=[],
+        pts_mask_fields=[],
+        pts_seg_fields=[],
+        bbox_fields=[],
+        mask_fields=[],
+        seg_fields=[])
+    data = test_pipeline(data)
+    data = collate([data], samples_per_gpu=1)
+    if next(model.parameters()).is_cuda:
+        # scatter to specified GPU
+        data = scatter(data, [device.index])[0]
+    else:
+        # this is a workaround to avoid the bug of MMDataParallel
+        data['img_metas'] = data['img_metas'][0].data
+        data['points'] = data['points'][0].data
+    # forward the model
+    with torch.no_grad():
+        result = model(return_loss=False, rescale=True, **data)
+    return result, data
+def show_det_result_meshlab(data,
+                            result,
+                            out_dir,
+                            score_thr=0.0,
+                            show=False,
+                            snapshot=False):
+    """Show 3D detection result by meshlab."""
    points = data['points'][0][0].cpu().numpy()
    pts_filename = data['img_metas'][0][0]['pts_filename']
    file_name = osp.split(pts_filename)[-1].split('.')[0]
-    assert out_dir is not None, 'Expect out_dir, got none.'
    if 'pts_bbox' in result[0].keys():
        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
        pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
@@ -227,6 +259,7 @@ def show_result_meshlab(data,
        show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)
    else:
        show_bboxes = deepcopy(pred_bboxes)
    show_result(
        points,
        None,
@@ -236,18 +269,76 @@ def show_result_meshlab(data,
        show=show,
        snapshot=snapshot)
-    if 'img' not in data.keys():
+    return file_name
-        return out_dir, file_name
+def show_seg_result_meshlab(data,
+                            result,
+                            out_dir,
+                            palette,
+                            show=False,
+                            snapshot=False):
+    """Show 3D segmentation result by meshlab."""
+    points = data['points'][0][0].cpu().numpy()
+    pts_filename = data['img_metas'][0][0]['pts_filename']
+    file_name = osp.split(pts_filename)[-1].split('.')[0]
+    pred_seg = result[0]['semantic_mask'].numpy()
+    if palette is None:
+        # generate random color map
+        max_idx = pred_seg.max()
+        palette = np.random.randint(0, 256, size=(max_idx + 1, 3))
+    palette = np.array(palette).astype(np.int)
+    show_seg_result(
+        points,
+        None,
+        pred_seg,
+        out_dir,
+        file_name,
+        palette=palette,
+        show=show,
+        snapshot=snapshot)
+    return file_name
-    # multi-modality visualization
+def show_proj_det_result_meshlab(data,
-    # project 3D bbox to 2D image plane
+                                 result,
+                                 out_dir,
+                                 score_thr=0.0,
+                                 show=False,
+                                 snapshot=False):
+    """Show result of projecting 3D bbox to 2D image by meshlab."""
+    assert 'img' in data.keys(), 'image data is not provided for visualization'
+    img_filename = data['img_metas'][0][0]['filename']
+    file_name = osp.split(img_filename)[-1].split('.')[0]
+    # read from file because img in data_dict has undergone pipeline transform
+    img = mmcv.imread(img_filename)
+    # TODO: use 'img_bbox' for Mono3D visualization
+    if 'pts_bbox' in result[0].keys():
+        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
+        pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
+    else:
+        pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
+        pred_scores = result[0]['scores_3d'].numpy()
+    # filter out low score bboxes for visualization
+    if score_thr > 0:
+        inds = pred_scores > score_thr
+        pred_bboxes = pred_bboxes[inds]
+    box_mode = data['img_metas'][0][0]['box_mode_3d']
    if box_mode == Box3DMode.LIDAR:
        if 'lidar2img' not in data['img_metas'][0][0]:
            raise NotImplementedError(
                'LiDAR to image transformation matrix is not provided')
        show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-        img = mmcv.imread(data['img_metas'][0][0]['filename'])
        show_multi_modality_result(
            img,
@@ -263,7 +354,6 @@ def show_result_meshlab(data,
                'camera calibration information is not provided')
        show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-        img = mmcv.imread(data['img_metas'][0][0]['filename'])
        show_multi_modality_result(
            img,
@@ -279,4 +369,47 @@ def show_result_meshlab(data,
        raise NotImplementedError(
            f'visualization of {box_mode} bbox is not supported')
+    return file_name
+def show_result_meshlab(data,
+                        result,
+                        out_dir,
+                        score_thr=0.0,
+                        show=False,
+                        snapshot=False,
+                        task='det',
+                        palette=None):
+    """Show result by meshlab.
+    Args:
+        data (dict): Contain data from pipeline.
+        result (dict): Predicted result from model.
+        out_dir (str): Directory to save visualized result.
+        score_thr (float): Minimum score of bboxes to be shown. Default: 0.0
+        show (bool): Visualize the results online. Defaults to False.
+        snapshot (bool): Whether to save the online results. Defaults to False.
+        task (str): Distinguish which task result to visualize. Currently we
+            support 3D detection, multi-modality detection and 3D segmentation.
+            Defaults to 'det'.
+        palette (list[list[int]]] | np.ndarray | None): The palette of
+                segmentation map. If None is given, random palette will be
+                generated. Defaults to None.
+    """
+    assert task in ['det', 'multi_modality-det', 'seg'], \
+        f'unsupported visualization task {task}'
+    assert out_dir is not None, 'Expect out_dir, got none.'
+    if 'det' in task:
+        file_name = show_det_result_meshlab(data, result, out_dir, score_thr,
+                                            show, snapshot)
+    if 'seg' in task:
+        file_name = show_seg_result_meshlab(data, result, out_dir, palette,
+                                            show, snapshot)
+    if task == 'multi_modality-det':
+        file_name = show_proj_det_result_meshlab(data, result, out_dir,
+                                                 score_thr, show, snapshot)
    return out_dir, file_name
--- a/tests/data/kitti/training/image_2/000000.png
+++ b/tests/data/kitti/training/image_2/000000.png
--- a/tests/data/kitti/training/image_2/000007.png
+++ b/tests/data/kitti/training/image_2/000007.png
--- a/tests/test_runtime/test_apis.py
+++ b/tests/test_runtime/test_apis.py
@@ -6,12 +6,14 @@ import torch
 from mmcv.parallel import MMDataParallel
 from os.path import dirname, exists, join
-from mmdet3d.apis import (convert_SyncBN, inference_detector, init_detector,
+from mmdet3d.apis import (convert_SyncBN, inference_detector,
-                          show_result_meshlab, single_gpu_test)
+                          inference_multi_modality_detector,
+                          inference_segmentor, init_model, show_result_meshlab,
+                          single_gpu_test)
 from mmdet3d.core import Box3DMode
 from mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes
 from mmdet3d.datasets import build_dataloader, build_dataset
-from mmdet3d.models import build_detector
+from mmdet3d.models import build_model
 def _get_config_directory():
@@ -79,7 +81,7 @@ def test_show_result_meshlab():
    tmp_dir.cleanup()
    # test multi-modality show
-    # Indoor scene
+    # indoor scene
    pcd = 'tests/data/sunrgbd/points/000001.bin'
    filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg'
    box_3d = DepthInstance3DBoxes(
@@ -106,14 +108,11 @@ def test_show_result_meshlab():
        img_metas=[[img_meta]],
        img=[img],
        calib=[calib])
-    result = [
+    result = [dict(boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)]
-        dict(
-            pts_bbox=dict(
-                boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
-    ]
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
-    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3)
+    out_dir, file_name = show_result_meshlab(
+        data, result, temp_out_dir, 0.3, task='multi_modality-det')
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
@@ -160,10 +159,10 @@ def test_show_result_meshlab():
            pts_bbox=dict(
                boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
    ]
-    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1)
+    out_dir, file_name = show_result_meshlab(
+        data, result, temp_out_dir, 0.1, task='multi_modality-det')
    tmp_dir = tempfile.TemporaryDirectory()
    temp_out_dir = tmp_dir.name
-    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3)
    expected_outfile_pred = file_name + '_pred.obj'
    expected_outfile_pts = file_name + '_points.obj'
    expected_outfile_png = file_name + '_img.png'
@@ -182,12 +181,33 @@ def test_show_result_meshlab():
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
+    # test seg show
+    pcd = 'tests/data/scannet/points/scene0000_00.bin'
+    points = np.random.rand(100, 6)
+    img_meta = dict(pts_filename=pcd)
+    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])
+    pred_seg = torch.randint(0, 20, (100, ))
+    result = [dict(semantic_mask=pred_seg)]
+    tmp_dir = tempfile.TemporaryDirectory()
+    temp_out_dir = tmp_dir.name
+    out_dir, file_name = show_result_meshlab(
+        data, result, temp_out_dir, task='seg')
+    expected_outfile_pred = file_name + '_pred.obj'
+    expected_outfile_pts = file_name + '_points.obj'
+    expected_outfile_pred_path = os.path.join(out_dir, file_name,
+                                              expected_outfile_pred)
+    expected_outfile_pts_path = os.path.join(out_dir, file_name,
+                                             expected_outfile_pts)
+    assert os.path.exists(expected_outfile_pred_path)
+    assert os.path.exists(expected_outfile_pts_path)
+    tmp_dir.cleanup()
 def test_inference_detector():
    pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
    detector_cfg = 'configs/pointpillars/hv_pointpillars_secfpn_' \
                   '6x8_160e_kitti-3d-3class.py'
-    detector = init_detector(detector_cfg, device='cpu')
+    detector = init_model(detector_cfg, device='cpu')
    results = inference_detector(detector, pcd)
    bboxes_3d = results[0][0]['boxes_3d']
    scores_3d = results[0][0]['scores_3d']
@@ -198,12 +218,64 @@ def test_inference_detector():
    assert labels_3d.shape[0] >= 0
+def test_inference_multi_modality_detector():
+    # these two multi-modality models both only have GPU implementations
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    # indoor scene
+    pcd = 'tests/data/sunrgbd/points/000001.bin'
+    img = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg'
+    ann_file = 'tests/data/sunrgbd/sunrgbd_infos.pkl'
+    detector_cfg = 'configs/imvotenet/imvotenet_stage2_'\
+                   '16x8_sunrgbd-3d-10class.py'
+    detector = init_model(detector_cfg, device='cuda:0')
+    results = inference_multi_modality_detector(detector, pcd, img, ann_file)
+    bboxes_3d = results[0][0]['boxes_3d']
+    scores_3d = results[0][0]['scores_3d']
+    labels_3d = results[0][0]['labels_3d']
+    assert bboxes_3d.tensor.shape[0] >= 0
+    assert bboxes_3d.tensor.shape[1] == 7
+    assert scores_3d.shape[0] >= 0
+    assert labels_3d.shape[0] >= 0
+    # outdoor scene
+    pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
+    img = 'tests/data/kitti/training/image_2/000000.png'
+    ann_file = 'tests/data/kitti/kitti_infos_train.pkl'
+    detector_cfg = 'configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_' \
+                   '2x8_80e_kitti-3d-3class.py'
+    detector = init_model(detector_cfg, device='cuda:0')
+    results = inference_multi_modality_detector(detector, pcd, img, ann_file)
+    bboxes_3d = results[0][0]['pts_bbox']['boxes_3d']
+    scores_3d = results[0][0]['pts_bbox']['scores_3d']
+    labels_3d = results[0][0]['pts_bbox']['labels_3d']
+    assert bboxes_3d.tensor.shape[0] >= 0
+    assert bboxes_3d.tensor.shape[1] == 7
+    assert scores_3d.shape[0] >= 0
+    assert labels_3d.shape[0] >= 0
+def test_inference_segmentor():
+    # PN2 only has GPU implementations
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    pcd = 'tests/data/scannet/points/scene0000_00.bin'
+    segmentor_cfg = 'configs/pointnet2/pointnet2_ssg_' \
+                    '16x2_scannet-3d-20class.py'
+    segmentor = init_model(segmentor_cfg, device='cuda:0')
+    results = inference_segmentor(segmentor, pcd)
+    seg_3d = results[0][0]['semantic_mask']
+    assert seg_3d.shape == torch.Size([100])
+    assert seg_3d.min() >= 0
+    assert seg_3d.max() <= 19
 def test_single_gpu_test():
    if not torch.cuda.is_available():
        pytest.skip('test requires GPU and torch+cuda')
    cfg = _get_config_module('votenet/votenet_16x8_sunrgbd-3d-10class.py')
    cfg.model.train_cfg = None
-    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
+    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
    dataset_cfg = cfg.data.test
    dataset_cfg.data_root = './tests/data/sunrgbd'
    dataset_cfg.ann_file = 'tests/data/sunrgbd/sunrgbd_infos.pkl'

--- a/tools/misc/fuse_conv_bn.py
+++ b/tools/misc/fuse_conv_bn.py
@@ -3,7 +3,7 @@ import torch
 from mmcv.runner import save_checkpoint
 from torch import nn as nn
-from mmdet.apis import init_detector
+from mmdet.apis import init_model
 def fuse_conv_bn(conv, bn):
@@ -56,7 +56,7 @@ def parse_args():
 def main():
    args = parse_args()
    # build the model from a config file and a checkpoint file
-    model = init_detector(args.config, args.checkpoint)
+    model = init_model(args.config, args.checkpoint)
    # fuse conv and bn layers of the model
    fused_model = fuse_module(model)
    save_checkpoint(fused_model, args.out)