[Feature] Support visualization and browse_dataset on nuScenes Mono-3D dataset (#542)

* fix comment errors * add eval_pipeline in mono-nuscene cfg * add vis function in nuscene-mono dataset * refactor vis function to support all three mode boxes proj to img * add unit test for nuscene-mono show func * browse_dataset support nuScenes_mono * add show_results() to SingleStageMono3DDetector * support mono-3d dataset browse * support nus_mono_dataset and single_stage_mono_detector show function * update useful_tools.md docs * support mono-3d demo * add unit test * update demo docs * fix typos & remove unused comments * polish docs

[Feature] Support visualization and browse_dataset on nuScenes Mono-3D dataset (#542)
* fix comment errors * add eval_pipeline in mono-nuscene cfg * add vis function in nuscene-mono dataset * refactor vis function to support all three mode boxes proj to img * add unit test for nuscene-mono show func * browse_dataset support nuScenes_mono * add show_results() to SingleStageMono3DDetector * support mono-3d dataset browse * support nus_mono_dataset and single_stage_mono_detector show function * update useful_tools.md docs * support mono-3d demo * add unit test * update demo docs * fix typos & remove unused comments * polish docs
c81426a3 · Ziyi Wu · GitHub · d37c3305 · d37c3305 · c81426a3
Unverified Commit c81426a3 authored May 26, 2021 by Ziyi Wu Committed by GitHub May 26, 2021
4 changed files
--- a/resources/browse_dataset_seg.png
+++ b/resources/browse_dataset_seg.png
--- a/tests/test_data/test_datasets/test_nuscenes_mono_dataset.py
+++ b/tests/test_data/test_datasets/test_nuscenes_mono_dataset.py
 import mmcv
 import numpy as np
 import pytest
+import tempfile
 import torch
+from os import path as osp
 from mmdet3d.datasets import NuScenesMonoDataset
@@ -149,3 +151,42 @@ def test_format_results():
        torch.tensor(det['rotation']), expected_rotation, 1e-5)
    assert det['detection_name'] == expected_detname
    assert det['attribute_name'] == expected_attr
+def test_show():
+    root_path = 'tests/data/nuscenes/'
+    ann_file = 'tests/data/nuscenes/nus_infos_mono3d.coco.json'
+    class_names = [
+        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+    ]
+    eval_pipeline = [
+        dict(type='LoadImageFromFileMono3D'),
+        dict(
+            type='DefaultFormatBundle3D',
+            class_names=class_names,
+            with_label=False),
+        dict(type='Collect3D', keys=['img'])
+    ]
+    nus_dataset = NuScenesMonoDataset(
+        data_root=root_path,
+        ann_file=ann_file,
+        img_prefix='tests/data/nuscenes/',
+        test_mode=True,
+        pipeline=eval_pipeline)
+    results = mmcv.load('tests/data/nuscenes/mono3d_sample_results.pkl')
+    results = [results[0]]
+    # show with eval_pipeline
+    tmp_dir = tempfile.TemporaryDirectory()
+    temp_dir = tmp_dir.name
+    nus_dataset.show(results, temp_dir, show=False)
+    file_name = 'n015-2018-07-18-11-07-57+0800__' \
+                'CAM_BACK_LEFT__1531883530447423'
+    img_file_path = osp.join(temp_dir, file_name, f'{file_name}_img.png')
+    gt_file_path = osp.join(temp_dir, file_name, f'{file_name}_gt.png')
+    pred_file_path = osp.join(temp_dir, file_name, f'{file_name}_pred.png')
+    mmcv.check_file_exist(img_file_path)
+    mmcv.check_file_exist(gt_file_path)
+    mmcv.check_file_exist(pred_file_path)
+    tmp_dir.cleanup()
--- a/tests/test_runtime/test_apis.py
+++ b/tests/test_runtime/test_apis.py
@@ -7,11 +7,13 @@ from mmcv.parallel import MMDataParallel
 from os.path import dirname, exists, join
 from mmdet3d.apis import (convert_SyncBN, inference_detector,
+                          inference_mono_3d_detector,
                          inference_multi_modality_detector,
                          inference_segmentor, init_model, show_result_meshlab,
                          single_gpu_test)
 from mmdet3d.core import Box3DMode
-from mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes
+from mmdet3d.core.bbox import (CameraInstance3DBoxes, DepthInstance3DBoxes,
+                               LiDARInstance3DBoxes)
 from mmdet3d.datasets import build_dataloader, build_dataset
 from mmdet3d.models import build_model
@@ -180,6 +182,44 @@ def test_show_result_meshlab():
    assert os.path.exists(expected_outfile_png_path)
    assert os.path.exists(expected_outfile_proj_path)
    tmp_dir.cleanup()
+    # test mono-3d show
+    filename = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-' \
+               '07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg'
+    box_3d = CameraInstance3DBoxes(
+        torch.tensor(
+            [[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
+    img = np.random.randn(1, 3, 384, 1280)
+    cam_intrinsic = np.array([[100.0, 0.0, 50.0], [0.0, 100.0, 50.0],
+                              [0.0, 0.0, 1.0]])
+    img_meta = dict(
+        filename=filename,
+        pcd_horizontal_flip=False,
+        pcd_vertical_flip=False,
+        box_mode_3d=Box3DMode.CAM,
+        box_type_3d=CameraInstance3DBoxes,
+        pcd_trans=np.array([0., 0., 0.]),
+        pcd_scale_factor=1.0,
+        cam_intrinsic=cam_intrinsic)
+    data = dict(
+        points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img])
+    result = [
+        dict(
+            img_bbox=dict(
+                boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
+    ]
+    out_dir, file_name = show_result_meshlab(
+        data, result, temp_out_dir, 0.1, task='mono-det')
+    tmp_dir = tempfile.TemporaryDirectory()
+    temp_out_dir = tmp_dir.name
+    expected_outfile_png = file_name + '_img.png'
+    expected_outfile_proj = file_name + '_pred.png'
+    expected_outfile_png_path = os.path.join(out_dir, file_name,
+                                             expected_outfile_png)
+    expected_outfile_proj_path = os.path.join(out_dir, file_name,
+                                              expected_outfile_proj)
+    assert os.path.exists(expected_outfile_png_path)
+    assert os.path.exists(expected_outfile_proj_path)
+    tmp_dir.cleanup()
    # test seg show
    pcd = 'tests/data/scannet/points/scene0000_00.bin'
@@ -255,6 +295,26 @@ def test_inference_multi_modality_detector():
    assert labels_3d.shape[0] >= 0
+def test_inference_mono_3d_detector():
+    # FCOS3D only has GPU implementations
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    img = 'tests/data/nuscenes/samples/CAM_BACK_LEFT/' \
+          'n015-2018-07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg'
+    ann_file = 'tests/data/nuscenes/nus_infos_mono3d.coco.json'
+    detector_cfg = 'configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_' \
+                   '2x8_1x_nus-mono3d.py'
+    detector = init_model(detector_cfg, device='cuda:0')
+    results = inference_mono_3d_detector(detector, img, ann_file)
+    bboxes_3d = results[0][0]['img_bbox']['boxes_3d']
+    scores_3d = results[0][0]['img_bbox']['scores_3d']
+    labels_3d = results[0][0]['img_bbox']['labels_3d']
+    assert bboxes_3d.tensor.shape[0] >= 0
+    assert bboxes_3d.tensor.shape[1] == 9
+    assert scores_3d.shape[0] >= 0
+    assert labels_3d.shape[0] >= 0
 def test_inference_segmentor():
    # PN2 only has GPU implementations
    if not torch.cuda.is_available():

--- a/tools/misc/browse_dataset.py
+++ b/tools/misc/browse_dataset.py
@@ -4,8 +4,8 @@ import warnings
 from mmcv import Config, DictAction, mkdir_or_exist, track_iter_progress
 from os import path as osp
-from mmdet3d.core.bbox import (Box3DMode, Coord3DMode, DepthInstance3DBoxes,
+from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
-                               LiDARInstance3DBoxes)
+                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
 from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
                                     show_seg_result)
 from mmdet3d.datasets import build_dataset
@@ -26,11 +26,10 @@ def parse_args():
        type=str,
        help='If there is no display interface, you can save it')
    parser.add_argument(
-        '--multi-modality',
+        '--task',
-        action='store_true',
+        type=str,
-        help='Whether to visualize multi-modality data. If True, we will show '
+        choices=['det', 'seg', 'multi_modality-det', 'mono-det'],
-        'both 3D point clouds with 3D bounding boxes and 2D images with '
+        help='Determine the visualization method depending on the task.')
-        'projected bounding boxes.')
    parser.add_argument(
        '--online',
        action='store_true',
@@ -118,9 +117,17 @@ def show_seg_data(idx, dataset, out_dir, filename, show=False):
        snapshot=True)
-def show_proj_bbox_img(idx, dataset, out_dir, filename, show=False):
+def show_proj_bbox_img(idx,
+                       dataset,
+                       out_dir,
+                       filename,
+                       show=False,
+                       is_nus_mono=False):
    """Visualize 3D bboxes on 2D image by projection."""
+    try:
        example = dataset.prepare_train_data(idx)
+    except AttributeError:  # for Mono-3D datasets
+        example = dataset.prepare_train_img(idx)
    gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d']
    img_metas = example['img_metas']._data
    img = example['img']._data.numpy()
@@ -137,7 +144,7 @@ def show_proj_bbox_img(idx, dataset, out_dir, filename, show=False):
            example['calib'],
            out_dir,
            filename,
-            depth_bbox=True,
+            box_mode='depth',
            img_metas=img_metas,
            show=show)
    elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
@@ -148,28 +155,32 @@ def show_proj_bbox_img(idx, dataset, out_dir, filename, show=False):
            img_metas['lidar2img'],
            out_dir,
            filename,
-            depth_bbox=False,
+            box_mode='lidar',
+            img_metas=img_metas,
+            show=show)
+    elif isinstance(gt_bboxes, CameraInstance3DBoxes):
+        # TODO: remove the hack of box from NuScenesMonoDataset
+        if is_nus_mono:
+            from mmdet3d.core.bbox import mono_cam_box2vis
+            gt_bboxes = mono_cam_box2vis(gt_bboxes)
+        show_multi_modality_result(
+            img,
+            gt_bboxes,
+            None,
+            img_metas['cam_intrinsic'],
+            out_dir,
+            filename,
+            box_mode='camera',
            img_metas=img_metas,
            show=show)
    else:
        # can't project, just show img
+        warnings.warn(
+            f'unrecognized gt box type {type(gt_bboxes)}, only show image')
        show_multi_modality_result(
            img, None, None, None, out_dir, filename, show=show)
-def is_multi_modality(dataset):
-    """Judge whether a dataset loads multi-modality data (points+img)."""
-    if not hasattr(dataset, 'modality') or dataset.modality is None:
-        return False
-    if dataset.modality['use_camera']:
-        # even dataset with `use_camera=True` may not load img
-        # should check its loaded data
-        example = dataset.prepare_train_data(0)
-        if 'img' in example.keys():
-            return True
-    return False
 def main():
    args = parse_args()
@@ -186,43 +197,40 @@ def main():
    dataset_type = cfg.dataset_type
    # configure visualization mode
-    vis_type = 'det'  # single-modality detection
+    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'
-    if dataset_type in ['ScanNetSegDataset', 'S3DISSegDataset']:
-        vis_type = 'seg'  # segmentation
-    multi_modality = args.multi_modality
-    if multi_modality:
-        # check whether dataset really supports multi-modality input
-        if not is_multi_modality(dataset):
-            warnings.warn(
-                f'{dataset_type} with current config does not support multi-'
-                'modality data loading, only show point clouds here')
-            multi_modality = False
    for idx, data_info in enumerate(track_iter_progress(data_infos)):
        if dataset_type in ['KittiDataset', 'WaymoDataset']:
-            pts_path = data_info['point_cloud']['velodyne_path']
+            data_path = data_info['point_cloud']['velodyne_path']
        elif dataset_type in [
                'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
                'S3DISSegDataset'
        ]:
-            pts_path = data_info['pts_path']
+            data_path = data_info['pts_path']
        elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
-            pts_path = data_info['lidar_path']
+            data_path = data_info['lidar_path']
+        elif dataset_type in ['NuScenesMonoDataset']:
+            data_path = data_info['file_name']
        else:
            raise NotImplementedError(
                f'unsupported dataset type {dataset_type}')
-        file_name = osp.splitext(osp.basename(pts_path))[0]
+        file_name = osp.splitext(osp.basename(data_path))[0]
-        if vis_type == 'det':
+        if vis_task in ['det', 'multi_modality-det']:
            # show 3D bboxes on 3D point clouds
            show_det_data(
                idx, dataset, args.output_dir, file_name, show=args.online)
-            if multi_modality:
+        if vis_task in ['multi_modality-det', 'mono-det']:
            # project 3D bboxes to 2D image
            show_proj_bbox_img(
-                    idx, dataset, args.output_dir, file_name, show=args.online)
+                idx,
-        elif vis_type == 'seg':
+                dataset,
+                args.output_dir,
+                file_name,
+                show=args.online,
+                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
+        elif vis_task in ['seg']:
            # show 3D segmentation mask on 3D point clouds
            show_seg_data(
                idx, dataset, args.output_dir, file_name, show=args.online)