Unverified Commit 59164170 authored by Ziyi Wu's avatar Ziyi Wu Committed by GitHub
Browse files

[Enhance] Dataset browse for multiple dataset types (#467)

* fix small bug in get_loading_pipeline()

* adopt eval_pipeline & support seg visualization

* support multi-modality vis

* fix small bugs

* add multi-modality args & support S3DIS vis

* update docs with imgs

* fix typos
parent 8e1f4b8d
......@@ -86,21 +86,29 @@ Or you can use 3D visualization software such as the [MeshLab](http://www.meshla
## Dataset
To browse the KITTI directly without inference, you can run the following command
We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, as well as multi-modality 3D detection on KITTI and SUN RGB-D. To browse the KITTI dataset, you can run the following command
```shell
python tools/misc/browse_dataset.py ${CONFIG_FILE} --output-dir ${OUTPUT_DIR}
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --output-dir ${OUTPUT_DIR} --online
```
Sample config can be found in `configs/_base_/datasets/` folder.
**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you don't have a monitor, you can remove the `--online` flag to only save the visualization results and browse them offline.
E.g.,
If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then add the `--multi-modality` flag to the command. An example is showed below
```shell
python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --output-dir ${OUTPUT_DIR} --online --multi-modality
```
![Open3D_visualization](../resources/browse_dataset_multi_modality.png)
You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task
```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet_seg-3d-20class.py --output-dir ${OUTPUT_DIR} --online
```
**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing _ESC_ in open3d window.
![Open3D_visualization](../resources/browse_dataset_seg.png)
 
......
......@@ -15,6 +15,9 @@ from mmdet.datasets.pipelines import LoadImageFromFile
def is_loading_function(transform):
"""Judge whether a transform function is a loading function.
Note: `MultiScaleFlipAug3D` is a wrapper for multiple pipeline functions,
so we need to search if its inner transforms contain any loading function.
Args:
transform (dict | :obj:`Pipeline`): A transform config or a function.
......@@ -34,7 +37,7 @@ def is_loading_function(transform):
return False
if obj_cls in loading_functions:
return True
if obj_cls in (MultiScaleFlipAug3D):
if obj_cls in (MultiScaleFlipAug3D, ):
return None
elif callable(transform):
if isinstance(transform, loading_functions):
......
import argparse
import numpy as np
import warnings
from mmcv import Config, DictAction, mkdir_or_exist, track_iter_progress
from os import path as osp
from mmdet3d.core.bbox import Box3DMode, Coord3DMode
from mmdet3d.core.visualizer.open3d_vis import Visualizer
from mmdet3d.datasets import build_dataset, get_loading_pipeline
from mmdet3d.core.bbox import (Box3DMode, Coord3DMode, DepthInstance3DBoxes,
LiDARInstance3DBoxes)
from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
show_seg_result)
from mmdet3d.datasets import build_dataset
def parse_args():
......@@ -21,6 +25,17 @@ def parse_args():
default=None,
type=str,
help='If there is no display interface, you can save it')
parser.add_argument(
'--multi-modality',
action='store_true',
help='Whether to visualize multi-modality data. If True, we will show '
'both 3D point clouds with 3D bounding boxes and 2D images with '
'projected bounding boxes.')
parser.add_argument(
'--online',
action='store_true',
help='Whether to perform online visualization. Note that you often '
'need a monitor to do so.')
parser.add_argument(
'--cfg-options',
nargs='+',
......@@ -35,7 +50,8 @@ def parse_args():
return args
def retrieve_data_cfg(config_path, skip_type, cfg_options):
def build_data_cfg(config_path, skip_type, cfg_options):
"""Build data config for loading visualization data."""
cfg = Config.fromfile(config_path)
if cfg_options is not None:
cfg.merge_from_dict(cfg_options)
......@@ -43,64 +59,173 @@ def retrieve_data_cfg(config_path, skip_type, cfg_options):
if cfg.get('custom_imports', None):
from mmcv.utils import import_modules_from_strings
import_modules_from_strings(**cfg['custom_imports'])
# extract inner dataset of `RepeatDataset` as `cfg.data.train`
# so we don't need to worry about it later
if cfg.data.train['type'] == 'RepeatDataset':
train_data_cfg = cfg.data.train.dataset
else:
train_data_cfg = cfg.data.train
cfg.data.train = cfg.data.train.dataset
train_data_cfg = cfg.data.train
# eval_pipeline purely consists of loading functions
# use eval_pipeline for data loading
train_data_cfg['pipeline'] = [
x for x in train_data_cfg.pipeline if x['type'] not in skip_type
x for x in cfg.eval_pipeline if x['type'] not in skip_type
]
return cfg
def to_depth_mode(points, bboxes):
"""Convert points and bboxes to Depth Coord and Depth Box mode."""
if points is not None:
points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
if bboxes is not None:
bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
Box3DMode.DEPTH)
return points, bboxes
def show_det_data(idx, dataset, out_dir, filename, show=False):
"""Visualize 3D point cloud and 3D bboxes."""
example = dataset.prepare_train_data(idx)
points = example['points']._data.numpy()
gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor
if dataset.box_mode_3d != Box3DMode.DEPTH:
points, gt_bboxes = to_depth_mode(points, gt_bboxes)
show_result(
points,
gt_bboxes.clone(),
None,
out_dir,
filename,
show=show,
snapshot=True)
def show_seg_data(idx, dataset, out_dir, filename, show=False):
"""Visualize 3D point cloud and segmentation mask."""
example = dataset.prepare_train_data(idx)
points = example['points']._data.numpy()
gt_seg = example['pts_semantic_mask']._data.numpy()
show_seg_result(
points,
gt_seg.copy(),
None,
out_dir,
filename,
np.array(dataset.PALETTE),
dataset.ignore_index,
show=show,
snapshot=True)
def show_proj_bbox_img(idx, dataset, out_dir, filename, show=False):
"""Visualize 3D bboxes on 2D image by projection."""
example = dataset.prepare_train_data(idx)
gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d']
img_metas = example['img_metas']._data
img = example['img']._data.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
# no 3D gt bboxes, just show img
if gt_bboxes.tensor.shape[0] == 0:
gt_bboxes = None
if isinstance(gt_bboxes, DepthInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
example['calib'],
out_dir,
filename,
depth_bbox=True,
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
img_metas['lidar2img'],
out_dir,
filename,
depth_bbox=False,
img_metas=img_metas,
show=show)
else:
# can't project, just show img
show_multi_modality_result(
img, None, None, None, out_dir, filename, show=show)
def is_multi_modality(dataset):
"""Judge whether a dataset loads multi-modality data (points+img)."""
if not hasattr(dataset, 'modality') or dataset.modality is None:
return False
if dataset.modality['use_camera']:
# even dataset with `use_camera=True` may not load img
# should check its loaded data
example = dataset.prepare_train_data(0)
if 'img' in example.keys():
return True
return False
def main():
args = parse_args()
if args.output_dir is not None:
mkdir_or_exist(args.output_dir)
cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options)
if cfg.data.train['type'] == 'RepeatDataset':
cfg.data.train.dataset['pipeline'] = get_loading_pipeline(
cfg.train_pipeline)
else:
cfg.data.train['pipeline'] = get_loading_pipeline(cfg.train_pipeline)
dataset = build_dataset(
cfg.data.train, default_args=dict(filter_empty_gt=False))
# For RepeatDataset type, the infos are stored in dataset.dataset
if cfg.data.train['type'] == 'RepeatDataset':
dataset = dataset.dataset
cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options)
try:
dataset = build_dataset(
cfg.data.train, default_args=dict(filter_empty_gt=False))
except TypeError: # seg dataset doesn't have `filter_empty_gt` key
dataset = build_dataset(cfg.data.train)
data_infos = dataset.data_infos
dataset_type = cfg.dataset_type
# configure visualization mode
vis_type = 'det' # single-modality detection
if dataset_type in ['ScanNetSegDataset', 'S3DISSegDataset']:
vis_type = 'seg' # segmentation
multi_modality = args.multi_modality
if multi_modality:
# check whether dataset really supports multi-modality input
if not is_multi_modality(dataset):
warnings.warn(
f'{dataset_type} with current config does not support multi-'
'modality data loading, only show point clouds here')
multi_modality = False
for idx, data_info in enumerate(track_iter_progress(data_infos)):
if cfg.dataset_type in ['KittiDataset', 'WaymoDataset']:
if dataset_type in ['KittiDataset', 'WaymoDataset']:
pts_path = data_info['point_cloud']['velodyne_path']
elif cfg.dataset_type in ['ScanNetDataset', 'SUNRGBDDataset']:
elif dataset_type in [
'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
'S3DISSegDataset'
]:
pts_path = data_info['pts_path']
elif cfg.dataset_type in ['NuScenesDataset', 'LyftDataset']:
elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
pts_path = data_info['lidar_path']
else:
raise NotImplementedError(
f'unsupported dataset type {cfg.dataset_type}')
file_name = osp.splitext(osp.basename(pts_path))[0]
save_path = osp.join(args.output_dir,
f'{file_name}.png') if args.output_dir else None
example = dataset.prepare_train_data(idx)
points = example['points']._data.numpy()
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor
if gt_bboxes is not None:
gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
f'unsupported dataset type {dataset_type}')
vis = Visualizer(points, save_path='./show.png')
vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
file_name = osp.splitext(osp.basename(pts_path))[0]
vis.show(save_path)
del vis
if vis_type == 'det':
# show 3D bboxes on 3D point clouds
show_det_data(
idx, dataset, args.output_dir, file_name, show=args.online)
if multi_modality:
# project 3D bboxes to 2D image
show_proj_bbox_img(
idx, dataset, args.output_dir, file_name, show=args.online)
elif vis_type == 'seg':
# show 3D segmentation mask on 3D point clouds
show_seg_data(
idx, dataset, args.output_dir, file_name, show=args.online)
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment