Unverified Commit c81426a3 authored by Ziyi Wu's avatar Ziyi Wu Committed by GitHub
Browse files

[Feature] Support visualization and browse_dataset on nuScenes Mono-3D dataset (#542)

* fix comment errors

* add eval_pipeline in mono-nuscene cfg

* add vis function in nuscene-mono dataset

* refactor vis function to support all three mode boxes proj to img

* add unit test for nuscene-mono show func

* browse_dataset support nuScenes_mono

* add show_results() to SingleStageMono3DDetector

* support mono-3d dataset browse

* support nus_mono_dataset and single_stage_mono_detector show function

* update useful_tools.md docs

* support mono-3d demo

* add unit test

* update demo docs

* fix typos & remove unused comments

* polish docs
parent d37c3305
...@@ -53,6 +53,17 @@ test_pipeline = [ ...@@ -53,6 +53,17 @@ test_pipeline = [
dict(type='Collect3D', keys=['img']), dict(type='Collect3D', keys=['img']),
]) ])
] ]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict( data = dict(
samples_per_gpu=2, samples_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
......
{"images": [{"file_name": "samples/CAM_BACK/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525.jpg", "cam_intrinsic": [[809.2209905677063, 0.0, 829.2196003259838], [0.0, 809.2209905677063, 481.77842384512485], [0.0, 0.0, 1.0]]}]}
from argparse import ArgumentParser
from mmdet3d.apis import (inference_mono_3d_detector, init_model,
show_result_meshlab)
def main():
parser = ArgumentParser()
parser.add_argument('image', help='image file')
parser.add_argument('ann', help='ann file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.15, help='bbox score threshold')
parser.add_argument(
'--out-dir', type=str, default='demo', help='dir to save results')
parser.add_argument(
'--show', action='store_true', help='show online visuliaztion results')
parser.add_argument(
'--snapshot',
action='store_true',
help='whether to save online visuliaztion results')
args = parser.parse_args()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single image
result, data = inference_mono_3d_detector(model, args.image, args.ann)
# show the results
show_result_meshlab(
data,
result,
args.out_dir,
args.score_thr,
show=args.show,
snapshot=args.snapshot,
task='mono-det')
if __name__ == '__main__':
main()
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Introduction ## Introduction
We provide scipts for multi-modality/single-modality, indoor/outdoor 3D detection and 3D semantic segmentation demos. The pre-trained models can be downloaded from [model zoo](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/model_zoo.md/). We provide pre-processed sample data from KITTI, SUN RGB-D and ScanNet dataset. You can use any other data following our pre-processing steps. We provide scripts for multi-modality/single-modality (LiDAR-based/vision-based), indoor/outdoor 3D detection and 3D semantic segmentation demos. The pre-trained models can be downloaded from [model zoo](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/model_zoo.md/). We provide pre-processed sample data from KITTI, SUN RGB-D, nuScenes and ScanNet dataset. You can use any other data following our pre-processing steps.
## Testing ## Testing
...@@ -54,6 +54,22 @@ Example on SUN RGB-D data using [ImVoteNet](https://github.com/open-mmlab/mmdete ...@@ -54,6 +54,22 @@ Example on SUN RGB-D data using [ImVoteNet](https://github.com/open-mmlab/mmdete
python demo/multi_modality_demo.py demo/data/sunrgbd/sunrgbd_000017.bin demo/data/sunrgbd/sunrgbd_000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py checkpoints/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021-d44dcb66.pth python demo/multi_modality_demo.py demo/data/sunrgbd/sunrgbd_000017.bin demo/data/sunrgbd/sunrgbd_000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py checkpoints/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021-d44dcb66.pth
``` ```
### Monocular 3D Detection
To test a monocular 3D detector on image data, simply run:
```shell
python demo/mono_det_demo.py ${IMAGE_FILE} ${ANNOTATION_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--out-dir ${OUT_DIR}] [--show]
```
where the `ANNOTATION_FILE` should provide the 3D to 2D projection matrix (camera intrinsic matrix). The visualization results including an image and its predicted 3D bounding boxes projected on the image will be saved in `${OUT_DIR}/PCD_NAME`.
Example on nuScenes data using [FCOS3D](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/fcos3d) model:
```shell
python demo/mono_det_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525.jpg demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK__1532402927637525_mono3d.coco.json configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune.py checkpoints/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210427_091419-35aaaad0.pth
```
### 3D Segmentation ### 3D Segmentation
To test a 3D segmentor on point cloud data, simply run: To test a 3D segmentor on point cloud data, simply run:
......
...@@ -10,6 +10,8 @@ You can plot loss/mAP curves given a training log file. Run `pip install seaborn ...@@ -10,6 +10,8 @@ You can plot loss/mAP curves given a training log file. Run `pip install seaborn
python tools/analysis_tools/analyze_logs.py plot_curve [--keys ${KEYS}] [--title ${TITLE}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}] [--mode ${MODE}] [--interval ${INTERVAL}] python tools/analysis_tools/analyze_logs.py plot_curve [--keys ${KEYS}] [--title ${TITLE}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}] [--mode ${MODE}] [--interval ${INTERVAL}]
``` ```
**Notice**: If the metric you want to plot is calculated in the eval stage, you need to add the flag `--mode eval`. If you perform evaluation with an interval of `${INTERVAL}`, you need to add the args `--interval ${INTERVAL}`.
Examples: Examples:
- Plot the classification loss of some run. - Plot the classification loss of some run.
...@@ -55,27 +57,27 @@ average iter time: 1.1959 s/iter ...@@ -55,27 +57,27 @@ average iter time: 1.1959 s/iter
## Results ## Results
To see the SUNRGBD, ScanNet or KITTI points and detection results, you can run the following command To see the prediction results of trained models, you can run the following command
```bash ```bash
python tools/test.py ${CONFIG_FILE} ${CKPT_PATH} --show --show-dir ${SHOW_DIR} python tools/test.py ${CONFIG_FILE} ${CKPT_PATH} --show --show-dir ${SHOW_DIR}
``` ```
Aftering running this command, plotted results **_\_points.obj and _**\_pred.obj files in `${SHOW_DIR}`. After running this command, plotted results including input data and the output of networks visualized on the input (e.g. `***_points.obj` and `***_pred.obj` in single-modality 3D detection task) will be saved in `${SHOW_DIR}`.
To see the points, detection results and ground truth of SUNRGBD, ScanNet or KITTI during evaluation time, you can run the following command To see the prediction results during evaluation time, you can run the following command
```bash ```bash
python tools/test.py ${CONFIG_FILE} ${CKPT_PATH} --eval 'mAP' --options 'show=True' 'out_dir=${SHOW_DIR}' python tools/test.py ${CONFIG_FILE} ${CKPT_PATH} --eval 'mAP' --options 'show=True' 'out_dir=${SHOW_DIR}'
``` ```
After running this command, you will obtain **_\_points.obj, _**\_pred.obj files and \*\*\*\_gt.obj in `${SHOW_DIR}`. When `show` is enabled, [Open3D](http://www.open3d.org/) will be used to visualize the results online. You need to set `show=False` while running test in remote server withou GUI. After running this command, you will obtain the input data, the output of networks and ground-truth labels visualized on the input (e.g. `***_points.obj`, `***_pred.obj`, `***_gt.obj`, `***_img.png` and `***_pred.png` in multi-modality detection task) in `${SHOW_DIR}`. When `show` is enabled, [Open3D](http://www.open3d.org/) will be used to visualize the results online. You need to set `show=False` while running test in remote server without GUI.
As for offline visualization, you will have two options. As for offline visualization, you will have two options.
To visualize the results with `Open3D` backend, you can run the following command To visualize the results with `Open3D` backend, you can run the following command
```bash ```bash
python tools/misc/visualize_results.py ${CONFIG_FILE} --result ${RESULTS_PATH} --show-dir ${SHOW_DIR}' python tools/misc/visualize_results.py ${CONFIG_FILE} --result ${RESULTS_PATH} --show-dir ${SHOW_DIR}
``` ```
![Open3D_visualization](../resources/open3d_visual.gif) ![Open3D_visualization](../resources/open3d_visual.gif)
...@@ -86,18 +88,18 @@ Or you can use 3D visualization software such as the [MeshLab](http://www.meshla ...@@ -86,18 +88,18 @@ Or you can use 3D visualization software such as the [MeshLab](http://www.meshla
## Dataset ## Dataset
We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, as well as multi-modality 3D detection on KITTI and SUN RGB-D. To browse the KITTI dataset, you can run the following command We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, multi-modality 3D detection on KITTI and SUN RGB-D, as well as monocular 3D detection on nuScenes. To browse the KITTI dataset, you can run the following command
```shell ```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --output-dir ${OUTPUT_DIR} --online python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --output-dir ${OUTPUT_DIR} --online
``` ```
**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you don't have a monitor, you can remove the `--online` flag to only save the visualization results and browse them offline. **Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you don't have a monitor, you can remove the `--online` flag to only save the visualization results and browse them offline.
If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then add the `--multi-modality` flag to the command. An example is showed below If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then change the `--task` args to `multi_modality-det`. An example is showed below
```shell ```shell
python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --output-dir ${OUTPUT_DIR} --online --multi-modality python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi_modality-det --output-dir ${OUTPUT_DIR} --online
``` ```
![Open3D_visualization](../resources/browse_dataset_multi_modality.png) ![Open3D_visualization](../resources/browse_dataset_multi_modality.png)
...@@ -105,11 +107,19 @@ python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adam ...@@ -105,11 +107,19 @@ python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adam
You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task
```shell ```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet_seg-3d-20class.py --output-dir ${OUTPUT_DIR} --online python tools/misc/browse_dataset.py configs/_base_/datasets/scannet_seg-3d-20class.py --task seg --output-dir ${OUTPUT_DIR} --online
``` ```
![Open3D_visualization](../resources/browse_dataset_seg.png) ![Open3D_visualization](../resources/browse_dataset_seg.png)
And browsing the nuScenes dataset in monocular 3D detection task
```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono-det --output-dir ${OUTPUT_DIR} --online
```
![Open3D_visualization](../resources/browse_dataset_mono.png)
   
# Model Complexity # Model Complexity
......
from .inference import (convert_SyncBN, inference_detector, from .inference import (convert_SyncBN, inference_detector,
inference_mono_3d_detector,
inference_multi_modality_detector, inference_segmentor, inference_multi_modality_detector, inference_segmentor,
init_model, show_result_meshlab) init_model, show_result_meshlab)
from .test import single_gpu_test from .test import single_gpu_test
...@@ -6,6 +7,6 @@ from .train import train_model ...@@ -6,6 +7,6 @@ from .train import train_model
__all__ = [ __all__ = [
'inference_detector', 'init_model', 'single_gpu_test', 'inference_detector', 'init_model', 'single_gpu_test',
'show_result_meshlab', 'convert_SyncBN', 'train_model', 'inference_mono_3d_detector', 'show_result_meshlab', 'convert_SyncBN',
'inference_multi_modality_detector', 'inference_segmentor' 'train_model', 'inference_multi_modality_detector', 'inference_segmentor'
] ]
...@@ -11,6 +11,7 @@ from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes, ...@@ -11,6 +11,7 @@ from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes,
LiDARInstance3DBoxes, show_multi_modality_result, LiDARInstance3DBoxes, show_multi_modality_result,
show_result, show_seg_result) show_result, show_seg_result)
from mmdet3d.core.bbox import get_box_type from mmdet3d.core.bbox import get_box_type
from mmdet3d.core.bbox.structures.cam_box3d import CameraInstance3DBoxes
from mmdet3d.datasets.pipelines import Compose from mmdet3d.datasets.pipelines import Compose
from mmdet3d.models import build_model from mmdet3d.models import build_model
...@@ -114,7 +115,7 @@ def inference_detector(model, pcd): ...@@ -114,7 +115,7 @@ def inference_detector(model, pcd):
def inference_multi_modality_detector(model, pcd, image, ann_file): def inference_multi_modality_detector(model, pcd, image, ann_file):
"""Inference point cloud with the multimodality detector. """Inference point cloud with the multi-modality detector.
Args: Args:
model (nn.Module): The loaded detector. model (nn.Module): The loaded detector.
...@@ -189,6 +190,65 @@ def inference_multi_modality_detector(model, pcd, image, ann_file): ...@@ -189,6 +190,65 @@ def inference_multi_modality_detector(model, pcd, image, ann_file):
return result, data return result, data
def inference_mono_3d_detector(model, image, ann_file):
"""Inference image with the monocular 3D detector.
Args:
model (nn.Module): The loaded detector.
image (str): Image files.
ann_file (str): Annotation files.
Returns:
tuple: Predicted results and data from pipeline.
"""
cfg = model.cfg
device = next(model.parameters()).device # model device
# build the data pipeline
test_pipeline = deepcopy(cfg.data.test.pipeline)
test_pipeline = Compose(test_pipeline)
box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)
# get data info containing calib
data_infos = mmcv.load(ann_file)
# find the info corresponding to this image
for x in data_infos['images']:
if osp.basename(x['file_name']) != osp.basename(image):
continue
img_info = x
break
data = dict(
img_prefix=osp.dirname(image),
img_info=dict(filename=osp.basename(image)),
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d,
img_fields=[],
bbox3d_fields=[],
pts_mask_fields=[],
pts_seg_fields=[],
bbox_fields=[],
mask_fields=[],
seg_fields=[])
# camera points to image conversion
if box_mode_3d == Box3DMode.CAM:
data['img_info'].update(dict(cam_intrinsic=img_info['cam_intrinsic']))
data = test_pipeline(data)
data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
# scatter to specified GPU
data = scatter(data, [device.index])[0]
else:
# this is a workaround to avoid the bug of MMDataParallel
data['img_metas'] = data['img_metas'][0].data
data['img'] = data['img'][0].data
# forward the model
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
return result, data
def inference_segmentor(model, pcd): def inference_segmentor(model, pcd):
"""Inference point cloud with the segmentor. """Inference point cloud with the segmentor.
...@@ -319,13 +379,12 @@ def show_proj_det_result_meshlab(data, ...@@ -319,13 +379,12 @@ def show_proj_det_result_meshlab(data,
# read from file because img in data_dict has undergone pipeline transform # read from file because img in data_dict has undergone pipeline transform
img = mmcv.imread(img_filename) img = mmcv.imread(img_filename)
# TODO: use 'img_bbox' for Mono3D visualization
if 'pts_bbox' in result[0].keys(): if 'pts_bbox' in result[0].keys():
pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy() result[0] = result[0]['pts_bbox']
pred_scores = result[0]['pts_bbox']['scores_3d'].numpy() elif 'img_bbox' in result[0].keys():
else: result[0] = result[0]['img_bbox']
pred_bboxes = result[0]['boxes_3d'].tensor.numpy() pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
pred_scores = result[0]['scores_3d'].numpy() pred_scores = result[0]['scores_3d'].numpy()
# filter out low score bboxes for visualization # filter out low score bboxes for visualization
if score_thr > 0: if score_thr > 0:
...@@ -347,6 +406,7 @@ def show_proj_det_result_meshlab(data, ...@@ -347,6 +406,7 @@ def show_proj_det_result_meshlab(data,
data['img_metas'][0][0]['lidar2img'], data['img_metas'][0][0]['lidar2img'],
out_dir, out_dir,
file_name, file_name,
box_mode='lidar',
show=show) show=show)
elif box_mode == Box3DMode.DEPTH: elif box_mode == Box3DMode.DEPTH:
if 'calib' not in data.keys(): if 'calib' not in data.keys():
...@@ -362,9 +422,29 @@ def show_proj_det_result_meshlab(data, ...@@ -362,9 +422,29 @@ def show_proj_det_result_meshlab(data,
data['calib'][0], data['calib'][0],
out_dir, out_dir,
file_name, file_name,
depth_bbox=True, box_mode='depth',
img_metas=data['img_metas'][0][0], img_metas=data['img_metas'][0][0],
show=show) show=show)
elif box_mode == Box3DMode.CAM:
if 'cam_intrinsic' not in data['img_metas'][0][0]:
raise NotImplementedError(
'camera intrinsic matrix is not provided')
from mmdet3d.core.bbox import mono_cam_box2vis
show_bboxes = CameraInstance3DBoxes(
pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))
# TODO: remove the hack of box from NuScenesMonoDataset
show_bboxes = mono_cam_box2vis(show_bboxes)
show_multi_modality_result(
img,
None,
show_bboxes,
data['img_metas'][0][0]['cam_intrinsic'],
out_dir,
file_name,
box_mode='camera',
show=show)
else: else:
raise NotImplementedError( raise NotImplementedError(
f'visualization of {box_mode} bbox is not supported') f'visualization of {box_mode} bbox is not supported')
...@@ -396,19 +476,19 @@ def show_result_meshlab(data, ...@@ -396,19 +476,19 @@ def show_result_meshlab(data,
segmentation map. If None is given, random palette will be segmentation map. If None is given, random palette will be
generated. Defaults to None. generated. Defaults to None.
""" """
assert task in ['det', 'multi_modality-det', 'seg'], \ assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \
f'unsupported visualization task {task}' f'unsupported visualization task {task}'
assert out_dir is not None, 'Expect out_dir, got none.' assert out_dir is not None, 'Expect out_dir, got none.'
if 'det' in task: if task in ['det', 'multi_modality-det']:
file_name = show_det_result_meshlab(data, result, out_dir, score_thr, file_name = show_det_result_meshlab(data, result, out_dir, score_thr,
show, snapshot) show, snapshot)
if 'seg' in task: if task in ['seg']:
file_name = show_seg_result_meshlab(data, result, out_dir, palette, file_name = show_seg_result_meshlab(data, result, out_dir, palette,
show, snapshot) show, snapshot)
if task == 'multi_modality-det': if task in ['multi_modality-det', 'mono-det']:
file_name = show_proj_det_result_meshlab(data, result, out_dir, file_name = show_proj_det_result_meshlab(data, result, out_dir,
score_thr, show, snapshot) score_thr, show, snapshot)
......
...@@ -3,7 +3,8 @@ import torch ...@@ -3,7 +3,8 @@ import torch
from mmcv.image import tensor2imgs from mmcv.image import tensor2imgs
from os import path as osp from os import path as osp
from mmdet3d.models import Base3DDetector, Base3DSegmentor from mmdet3d.models import (Base3DDetector, Base3DSegmentor,
SingleStageMono3DDetector)
def single_gpu_test(model, def single_gpu_test(model,
...@@ -39,7 +40,9 @@ def single_gpu_test(model, ...@@ -39,7 +40,9 @@ def single_gpu_test(model,
if show: if show:
# Visualize the results of MMDetection3D model # Visualize the results of MMDetection3D model
# 'show_results' is MMdetection3D visualization API # 'show_results' is MMdetection3D visualization API
if isinstance(model.module, (Base3DDetector, Base3DSegmentor)): models_3d = (Base3DDetector, Base3DSegmentor,
SingleStageMono3DDetector)
if isinstance(model.module, models_3d):
model.module.show_results(data, result, out_dir) model.module.show_results(data, result, out_dir)
# Visualize the results of MMDetection model # Visualize the results of MMDetection model
# 'show_result' is MMdetection visualization API # 'show_result' is MMdetection visualization API
......
...@@ -11,7 +11,7 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -11,7 +11,7 @@ from .samplers import (BaseSampler, CombinedSampler,
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
Coord3DMode, DepthInstance3DBoxes, Coord3DMode, DepthInstance3DBoxes,
LiDARInstance3DBoxes, get_box_type, limit_period, LiDARInstance3DBoxes, get_box_type, limit_period,
points_cam2img, xywhr2xyxyr) mono_cam_box2vis, points_cam2img, xywhr2xyxyr)
from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
__all__ = [ __all__ = [
...@@ -24,5 +24,5 @@ __all__ = [ ...@@ -24,5 +24,5 @@ __all__ = [
'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes', 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img', 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
'get_box_type', 'Coord3DMode' 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
] ]
...@@ -4,12 +4,12 @@ from .cam_box3d import CameraInstance3DBoxes ...@@ -4,12 +4,12 @@ from .cam_box3d import CameraInstance3DBoxes
from .coord_3d_mode import Coord3DMode from .coord_3d_mode import Coord3DMode
from .depth_box3d import DepthInstance3DBoxes from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes
from .utils import (get_box_type, limit_period, points_cam2img, from .utils import (get_box_type, limit_period, mono_cam_box2vis,
rotation_3d_in_axis, xywhr2xyxyr) points_cam2img, rotation_3d_in_axis, xywhr2xyxyr)
__all__ = [ __all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr', 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img', 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
'Coord3DMode' 'Coord3DMode', 'mono_cam_box2vis'
] ]
...@@ -142,3 +142,44 @@ def points_cam2img(points_3d, proj_mat): ...@@ -142,3 +142,44 @@ def points_cam2img(points_3d, proj_mat):
point_2d = torch.matmul(points_4, proj_mat.t()) point_2d = torch.matmul(points_4, proj_mat.t())
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3] point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
return point_2d_res return point_2d_res
def mono_cam_box2vis(cam_box):
"""This is a post-processing function on the bboxes from Mono-3D task. If
we want to perform projection visualization, we need to:
1. rotate the box along x-axis for np.pi / 2 (roll)
2. change orientation from local yaw to global yaw
3. convert yaw by (np.pi / 2 - yaw)
After applying this function, we can project and draw it on 2D images.
Args:
cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate \
system before conversion. Could be gt bbox loaded from dataset or \
network prediction output.
Returns:
:obj:`CameraInstance3DBoxes`: Box after conversion.
"""
from . import CameraInstance3DBoxes
assert isinstance(cam_box, CameraInstance3DBoxes), \
'input bbox should be CameraInstance3DBoxes!'
loc = cam_box.gravity_center
dim = cam_box.dims
yaw = cam_box.yaw
feats = cam_box.tensor[:, 7:]
# rotate along x-axis for np.pi / 2
# see also here: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L557 # noqa
dim[:, [1, 2]] = dim[:, [2, 1]]
# change local yaw to global yaw for visualization
# refer to https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L164-L166 # noqa
yaw += torch.atan2(loc[:, 0], loc[:, 2])
# convert yaw by (np.pi / 2 - yaw)
yaw = np.pi / 2.0 - yaw
cam_box = torch.cat([loc, dim, yaw[:, None], feats], dim=1)
cam_box = CameraInstance3DBoxes(
cam_box, box_dim=cam_box.shape[-1], origin=(0.5, 0.5, 0.5))
return cam_box
...@@ -56,46 +56,31 @@ def project_pts_on_img(points, ...@@ -56,46 +56,31 @@ def project_pts_on_img(points,
cv2.waitKey(100) cv2.waitKey(100)
def project_bbox3d_on_img(bboxes3d, def plot_rect3d_on_img(img,
raw_img, num_rects,
lidar2img_rt, rect_corners,
color=(0, 255, 0), color=(0, 255, 0),
thickness=1): thickness=1):
"""Project the 3D bbox on 2D image. """Plot the boundary lines of 3D rectangular on 2D images.
Args: Args:
bboxes3d (numpy.array, shape=[M, 7]): img (numpy.array): The numpy array of image.
3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. num_rects (int): Number of 3D rectangulars.
raw_img (numpy.array): The numpy array of image. rect_corners (numpy.array): Coordinates of the corners of 3D
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix rectangulars. Should be in the shape of [num_rect, 8, 2].
according to the camera intrinsic parameters. color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
color (tuple[int]): the color to draw bboxes. Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1. thickness (int, optional): The thickness of bboxes. Default: 1.
""" """
img = raw_img.copy()
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
pts_4d = np.concatenate(
[corners_3d.reshape(-1, 3),
np.ones((num_bbox * 8, 1))], axis=-1)
pts_2d = pts_4d @ lidar2img_rt.T
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7), line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7)) (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_bbox): for i in range(num_rects):
corners = imgfov_pts_2d[i].astype(np.int) corners = rect_corners[i].astype(np.int)
for start, end in line_indices: for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]), cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness, (corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA) cv2.LINE_AA)
cv2.imshow('project_bbox3d_img', img.astype(np.uint8)) return img.astype(np.uint8)
cv2.waitKey(0)
def draw_lidar_bbox3d_on_img(bboxes3d, def draw_lidar_bbox3d_on_img(bboxes3d,
...@@ -107,8 +92,8 @@ def draw_lidar_bbox3d_on_img(bboxes3d, ...@@ -107,8 +92,8 @@ def draw_lidar_bbox3d_on_img(bboxes3d,
"""Project the 3D bbox on 2D plane and draw on input image. """Project the 3D bbox on 2D plane and draw on input image.
Args: Args:
bboxes3d (numpy.array, shape=[M, 7]): bboxes3d (:obj:`LiDARInstance3DBoxes`):
3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. 3d bbox in lidar coordinate system to visualize.
raw_img (numpy.array): The numpy array of image. raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters. according to the camera intrinsic parameters.
...@@ -132,16 +117,7 @@ def draw_lidar_bbox3d_on_img(bboxes3d, ...@@ -132,16 +117,7 @@ def draw_lidar_bbox3d_on_img(bboxes3d,
pts_2d[:, 1] /= pts_2d[:, 2] pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2) imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7), return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_bbox):
corners = imgfov_pts_2d[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
def draw_depth_bbox3d_on_img(bboxes3d, def draw_depth_bbox3d_on_img(bboxes3d,
...@@ -153,8 +129,8 @@ def draw_depth_bbox3d_on_img(bboxes3d, ...@@ -153,8 +129,8 @@ def draw_depth_bbox3d_on_img(bboxes3d,
"""Project the 3D bbox on 2D plane and draw on input image. """Project the 3D bbox on 2D plane and draw on input image.
Args: Args:
bboxes3d (numpy.array, shape=[M, 7]): bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
3d camera bbox (x, y, z, dx, dy, dz, yaw) to visualize. 3d bbox in depth coordinate system to visualize.
raw_img (numpy.array): The numpy array of image. raw_img (numpy.array): The numpy array of image.
calibs (dict): Camera calibration information, Rt and K. calibs (dict): Camera calibration information, Rt and K.
img_metas (dict): Used in coordinates transformation. img_metas (dict): Used in coordinates transformation.
...@@ -193,13 +169,41 @@ def draw_depth_bbox3d_on_img(bboxes3d, ...@@ -193,13 +169,41 @@ def draw_depth_bbox3d_on_img(bboxes3d,
uv_origin = (uv_origin - 1).round() uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy() imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7), return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_bbox):
corners = imgfov_pts_2d[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
def draw_camera_bbox3d_on_img(bboxes3d,
raw_img,
cam_intrinsic,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
3d bbox in camera coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
cam_intrinsic (dict): Camera intrinsic matrix,
denoted as `K` in depth bbox coordinate system.
img_metas (dict): Useless here.
color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core.bbox import points_cam2img
img = raw_img.copy()
cam_intrinsic = copy.deepcopy(cam_intrinsic)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
if not isinstance(cam_intrinsic, torch.Tensor):
cam_intrinsic = torch.from_numpy(np.array(cam_intrinsic))
cam_intrinsic = cam_intrinsic.reshape(3, 3).float().cpu()
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(points_3d, cam_intrinsic)
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
...@@ -3,7 +3,8 @@ import numpy as np ...@@ -3,7 +3,8 @@ import numpy as np
import trimesh import trimesh
from os import path as osp from os import path as osp
from .image_vis import draw_depth_bbox3d_on_img, draw_lidar_bbox3d_on_img from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
draw_lidar_bbox3d_on_img)
def _write_obj(points, out_filename): def _write_obj(points, out_filename):
...@@ -202,7 +203,7 @@ def show_multi_modality_result(img, ...@@ -202,7 +203,7 @@ def show_multi_modality_result(img,
proj_mat, proj_mat,
out_dir, out_dir,
filename, filename,
depth_bbox=False, box_mode,
img_metas=None, img_metas=None,
show=False, show=False,
gt_bbox_color=(61, 102, 255), gt_bbox_color=(61, 102, 255),
...@@ -213,24 +214,29 @@ def show_multi_modality_result(img, ...@@ -213,24 +214,29 @@ def show_multi_modality_result(img,
Args: Args:
img (np.ndarray): The numpy array of image in cv2 fashion. img (np.ndarray): The numpy array of image in cv2 fashion.
gt_bboxes (np.ndarray): Ground truth boxes. gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
pred_bboxes (np.ndarray): Predicted boxes. pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
proj_mat (numpy.array, shape=[4, 4]): The projection matrix proj_mat (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters. according to the camera intrinsic parameters.
out_dir (str): Path of output directory out_dir (str): Path of output directory.
filename (str): Filename of the current frame. filename (str): Filename of the current frame.
depth_bbox (bool): Whether we are projecting camera bbox or lidar bbox. box_mode (str): Coordinate system the boxes are in.
img_metas (dict): Used in projecting cameta bbox. Should be one of 'depth', 'lidar' and 'camera'.
img_metas (dict): Used in projecting depth bbox.
show (bool): Visualize the results online. Defaults to False. show (bool): Visualize the results online. Defaults to False.
gt_bbox_color (str or tuple(int)): Color of bbox lines. gt_bbox_color (str or tuple(int)): Color of bbox lines.
The tuple of color should be in BGR order. Default: (255, 102, 61) The tuple of color should be in BGR order. Default: (255, 102, 61)
pred_bbox_color (str or tuple(int)): Color of bbox lines. pred_bbox_color (str or tuple(int)): Color of bbox lines.
The tuple of color should be in BGR order. Default: (72, 101, 241) The tuple of color should be in BGR order. Default: (72, 101, 241)
""" """
if depth_bbox: if box_mode == 'depth':
draw_bbox = draw_depth_bbox3d_on_img draw_bbox = draw_depth_bbox3d_on_img
else: elif box_mode == 'lidar':
draw_bbox = draw_lidar_bbox3d_on_img draw_bbox = draw_lidar_bbox3d_on_img
elif box_mode == 'camera':
draw_bbox = draw_camera_bbox3d_on_img
else:
raise NotImplementedError(f'unsupported box mode {box_mode}')
result_path = osp.join(out_dir, filename) result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path) mmcv.mkdir_or_exist(result_path)
......
...@@ -740,4 +740,5 @@ class KittiDataset(Custom3DDataset): ...@@ -740,4 +740,5 @@ class KittiDataset(Custom3DDataset):
img_metas['lidar2img'], img_metas['lidar2img'],
out_dir, out_dir,
file_name, file_name,
show=False) box_mode='lidar',
show=show)
...@@ -4,12 +4,16 @@ import numpy as np ...@@ -4,12 +4,16 @@ import numpy as np
import pyquaternion import pyquaternion
import tempfile import tempfile
import torch import torch
import warnings
from nuscenes.utils.data_classes import Box as NuScenesBox from nuscenes.utils.data_classes import Box as NuScenesBox
from os import path as osp from os import path as osp
from mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr from mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr
from mmdet.datasets import DATASETS, CocoDataset from mmdet.datasets import DATASETS, CocoDataset
from ..core.bbox import CameraInstance3DBoxes, get_box_type from ..core import show_multi_modality_result
from ..core.bbox import CameraInstance3DBoxes, get_box_type, mono_cam_box2vis
from .pipelines import Compose
from .utils import get_loading_pipeline
@DATASETS.register_module() @DATASETS.register_module()
...@@ -484,7 +488,8 @@ class NuScenesMonoDataset(CocoDataset): ...@@ -484,7 +488,8 @@ class NuScenesMonoDataset(CocoDataset):
jsonfile_prefix=None, jsonfile_prefix=None,
result_names=['img_bbox'], result_names=['img_bbox'],
show=False, show=False,
out_dir=None): out_dir=None,
pipeline=None):
"""Evaluation in nuScenes protocol. """Evaluation in nuScenes protocol.
Args: Args:
...@@ -499,6 +504,8 @@ class NuScenesMonoDataset(CocoDataset): ...@@ -499,6 +504,8 @@ class NuScenesMonoDataset(CocoDataset):
Default: False. Default: False.
out_dir (str): Path to save the visualization results. out_dir (str): Path to save the visualization results.
Default: None. Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns: Returns:
dict[str, float]: Results of each evaluation metric. dict[str, float]: Results of each evaluation metric.
...@@ -519,18 +526,129 @@ class NuScenesMonoDataset(CocoDataset): ...@@ -519,18 +526,129 @@ class NuScenesMonoDataset(CocoDataset):
tmp_dir.cleanup() tmp_dir.cleanup()
if show: if show:
self.show(results, out_dir) self.show(results, out_dir, pipeline=pipeline)
return results_dict return results_dict
def show(self, results, out_dir): @staticmethod
def _get_data(results, key):
"""Extract and return the data corresponding to key in result dict.
Args:
results (dict): Data loaded using pipeline.
key (str): Key of the desired data.
Returns:
np.ndarray | torch.Tensor | None: Data term.
"""
if key not in results.keys():
return None
# results[key] may be data or list[data]
# data may be wrapped inside DataContainer
data = results[key]
if isinstance(data, list) or isinstance(data, tuple):
data = data[0]
if isinstance(data, mmcv.parallel.DataContainer):
data = data._data
return data
def _extract_data(self, index, pipeline, key, load_annos=False):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert pipeline is not None, 'data loading pipeline is not provided'
img_info = self.data_infos[index]
input_dict = dict(img_info=img_info)
if load_annos:
ann_info = self.get_ann_info(index)
input_dict.update(dict(ann_info=ann_info))
self.pre_pipeline(input_dict)
example = pipeline(input_dict)
# extract data items according to keys
if isinstance(key, str):
data = self._get_data(example, key)
else:
data = [self._get_data(example, k) for k in key]
return data
def _get_pipeline(self, pipeline):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict] | None): Input pipeline. If None is given, \
get from self.pipeline.
"""
if pipeline is None:
if not hasattr(self, 'pipeline') or self.pipeline is None:
warnings.warn(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph')
return self._build_default_pipeline()
loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
return Compose(loading_pipeline)
return Compose(pipeline)
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
return Compose(pipeline)
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization. """Results visualization.
Args: Args:
results (list[dict]): List of bounding boxes results. results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result. out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
""" """
# TODO: support mono3d visualization assert out_dir is not None, 'Expect out_dir, got none.'
pass pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
if 'img_bbox' in result.keys():
result = result['img_bbox']
data_info = self.data_infos[i]
img_path = data_info['file_name']
file_name = osp.split(img_path)[-1].split('.')[0]
img, img_metas = self._extract_data(i, pipeline,
['img', 'img_metas'])
# need to transpose channel to first dim
img = img.numpy().transpose(1, 2, 0)
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d']
pred_bboxes = result['boxes_3d']
# TODO: remove the hack of box from NuScenesMonoDataset
gt_bboxes = mono_cam_box2vis(gt_bboxes)
pred_bboxes = mono_cam_box2vis(pred_bboxes)
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
img_metas['cam_intrinsic'],
out_dir,
file_name,
box_mode='camera',
show=show)
def output_to_nusc_box(detection): def output_to_nusc_box(detection):
......
...@@ -214,7 +214,7 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -214,7 +214,7 @@ class SUNRGBDDataset(Custom3DDataset):
calib, calib,
out_dir, out_dir,
file_name, file_name,
depth_bbox=True, box_mode='depth',
img_metas=img_metas, img_metas=img_metas,
show=show) show=show)
......
import mmcv
import numpy as np import numpy as np
import torch import torch
from mmcv.parallel import DataContainer as DC
from os import path as osp
from mmdet3d.core import bbox3d2result from mmdet3d.core import (CameraInstance3DBoxes, bbox3d2result,
mono_cam_box2vis, show_multi_modality_result)
from mmdet.models.builder import DETECTORS from mmdet.models.builder import DETECTORS
from mmdet.models.detectors.single_stage import SingleStageDetector from mmdet.models.detectors.single_stage import SingleStageDetector
...@@ -172,3 +176,46 @@ class SingleStageMono3DDetector(SingleStageDetector): ...@@ -172,3 +176,46 @@ class SingleStageMono3DDetector(SingleStageDetector):
bbox_list.update(img_bbox2d=bbox2d_img[0]) bbox_list.update(img_bbox2d=bbox2d_img[0])
return [bbox_list] return [bbox_list]
def show_results(self, data, result, out_dir):
"""Results visualization.
Args:
data (list[dict]): Input images and the information of the sample.
result (list[dict]): Prediction results.
out_dir (str): Output directory of visualization result.
"""
for batch_id in range(len(result)):
if isinstance(data['img_metas'][0], DC):
img_filename = data['img_metas'][0]._data[0][batch_id][
'filename']
cam_intrinsic = data['img_metas'][0]._data[0][batch_id][
'cam_intrinsic']
elif mmcv.is_list_of(data['img_metas'][0], dict):
img_filename = data['img_metas'][0][batch_id]['filename']
cam_intrinsic = data['img_metas'][0][batch_id]['cam_intrinsic']
else:
ValueError(
f"Unsupported data type {type(data['img_metas'][0])} "
f'for visualization!')
img = mmcv.imread(img_filename)
file_name = osp.split(img_filename)[-1].split('.')[0]
assert out_dir is not None, 'Expect out_dir, got none.'
pred_bboxes = result[batch_id]['img_bbox']['boxes_3d']
assert isinstance(pred_bboxes, CameraInstance3DBoxes), \
f'unsupported predicted bbox type {type(pred_bboxes)}'
# TODO: remove the hack of box from NuScenesMonoDataset
pred_bboxes = mono_cam_box2vis(pred_bboxes)
show_multi_modality_result(
img,
None,
pred_bboxes,
cam_intrinsic,
out_dir,
file_name,
'camera',
show=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment