Unverified Commit e21e61e0 authored by Ziyi Wu's avatar Ziyi Wu Committed by GitHub
Browse files

[Feature] Support multi-modality visualization (demos and dataset show function) (#405)



* uncomplete

* support lidar2img loading

* add box_type_3d args in MVX-Net config file

* support multi-modality demo for LiDAR point clouds

* support multi-modality demo for indoor (depth) point clouds

* move demo data into folder and modify docs

* add input check & more general filename matching

* update docs for demo and add README file for demo

* add score_threshold option to demos

* add data for ScanNet & KITTI dataset multi-modality test

* add multi-modality visualization in ScanNet and KITTI dataset

* add unittest for modified visualization function

* delete saved temp file and dirs in unittests using TemporaryDirectory

* fix typos in docs & move README of demos to docs/

* add demo docs to documentation

* fix link error
Co-authored-by: default avatarwHao-Wu <wenhaowu.chn@gmail.com>
parent 825f47a4
...@@ -202,7 +202,8 @@ data = dict( ...@@ -202,7 +202,8 @@ data = dict(
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, classes=class_names,
test_mode=False)), test_mode=False,
box_type_3d='LiDAR')),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
...@@ -212,7 +213,8 @@ data = dict( ...@@ -212,7 +213,8 @@ data = dict(
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, classes=class_names,
test_mode=True), test_mode=True,
box_type_3d='LiDAR'),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
...@@ -222,7 +224,8 @@ data = dict( ...@@ -222,7 +224,8 @@ data = dict(
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names, classes=class_names,
test_mode=True)) test_mode=True,
box_type_3d='LiDAR'))
# Training settings # Training settings
optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
# max_norm=10 is better for SECOND # max_norm=10 is better for SECOND
......
from argparse import ArgumentParser
from mmdet3d.apis import (inference_multi_modality_detector, init_detector,
show_result_meshlab)
def main():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('image', help='image file')
parser.add_argument('ann', help='ann file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.0, help='bbox score threshold')
parser.add_argument(
'--out-dir', type=str, default='demo', help='dir to save results')
args = parser.parse_args()
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
# test a single image
result, data = inference_multi_modality_detector(model, args.pcd,
args.image, args.ann)
# show the results
show_result_meshlab(data, result, args.out_dir, args.score_thr)
if __name__ == '__main__':
main()
...@@ -11,7 +11,7 @@ def main(): ...@@ -11,7 +11,7 @@ def main():
parser.add_argument( parser.add_argument(
'--device', default='cuda:0', help='Device used for inference') '--device', default='cuda:0', help='Device used for inference')
parser.add_argument( parser.add_argument(
'--score-thr', type=float, default=0.6, help='bbox score threshold') '--score-thr', type=float, default=0.0, help='bbox score threshold')
parser.add_argument( parser.add_argument(
'--out-dir', type=str, default='demo', help='dir to save results') '--out-dir', type=str, default='demo', help='dir to save results')
args = parser.parse_args() args = parser.parse_args()
...@@ -21,7 +21,7 @@ def main(): ...@@ -21,7 +21,7 @@ def main():
# test a single image # test a single image
result, data = inference_detector(model, args.pcd) result, data = inference_detector(model, args.pcd)
# show the results # show the results
show_result_meshlab(data, result, args.out_dir) show_result_meshlab(data, result, args.out_dir, args.score_thr)
if __name__ == '__main__': if __name__ == '__main__':
......
# Demo
## Introduction
We provide scipts for multi-modality/single-modality and indoor/outdoor 3D detection demos. The pre-trained models can be downloaded from [model zoo](../docs/model_zoo.md). We provide pre-processed sample data from KITTI and SUN RGB-D dataset. You can use any other data following our pre-processing steps.
## Testing
### Single-modality demo
To test a 3D detector on point cloud data, simply run:
```shell
python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}]
```
The visualization results including a point cloud and predicted 3D bounding boxes will be saved in ```demo/PCD_NAME```, which you can open using [MeshLab](http://www.meshlab.net/).
Example on KITTI data using [SECOND](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/second) model:
```shell
python demo/pcd_demo.py demo/data/kitti/kitti_000008.bin configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth
```
Example on SUN RGB-D data using [VoteNet](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/votenet) model:
```shell
python demo/pcd_demo.py demo/data/sunrgbd/sunrgbd_000017.bin configs/votenet/votenet_16x8_sunrgbd-3d-10class.py checkpoints/votenet_16x8_sunrgbd-3d-10class_20200620_230238-4483c0c0.pth
```
Remember to convert the VoteNet checkpoint if you are using mmdetection3d version >= 0.6.0. See its [README](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/votenet/README.md) for detailed instructions on how to convert the checkpoint.
### Multi-modality demo
To test a 3D detector on multi-modality data (typically point cloud and image), simply run:
```shell
python demo/multi_modality_demo.py ${PCD_FILE} ${IMAGE_FILE} ${ANNOTATION_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}]
```
where the ```ANNOTATION_FILE``` should provide the 3D to 2D projection matrix. The visualization results including a point cloud, an image, predicted 3D bounding boxes and their projection on the image will be saved in ```demo/PCD_NAME```.
Example on KITTI data using [MVX-Net](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/mvxnet) model:
```shell
python demo/multi_modality_demo.py demo/data/kitti/kitti_000008.bin demo/data/kitti/kitti_000008.png demo/data/kitti/kitti_000008_infos.pkl configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py checkpoints/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class_20200621_003904-10140f2d.pth
```
Example on SUN RGB-D data using [ImVoteNet](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/imvotenet) model:
```shell
python demo/multi_modality_demo.py demo/data/sunrgbd/sunrgbd_000017.bin demo/data/sunrgbd/sunrgbd_000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py checkpoints/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021-d44dcb66.pth
```
...@@ -194,7 +194,7 @@ PYTHONPATH="$(dirname $0)/..":$PYTHONPATH ...@@ -194,7 +194,7 @@ PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
### Point cloud demo ### Point cloud demo
We provide a demo script to test a single sample. Pre-trained models can be downloaded from [model zoo](model_zoo.md) We provide several demo scripts to test a single sample. Pre-trained models can be downloaded from [model zoo](model_zoo.md). To test a single-modality 3D detection on point cloud scenes:
```shell ```shell
python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}] python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID}] [--score-thr ${SCORE_THR}] [--out-dir ${OUT_DIR}]
...@@ -203,16 +203,18 @@ python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ...@@ -203,16 +203,18 @@ python demo/pcd_demo.py ${PCD_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device
Examples: Examples:
```shell ```shell
python demo/pcd_demo.py demo/kitti_000008.bin configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth python demo/pcd_demo.py demo/data/kitti/kitti_000008.bin configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth
``` ```
If you want to input a `ply` file, you can use the following function and convert it to `bin` format. Then you can use the converted `bin` file to generate demo. If you want to input a `ply` file, you can use the following function and convert it to `bin` format. Then you can use the converted `bin` file to generate demo.
Note that you need to install pandas and plyfile before using this script. This function can also be used for data preprocessing for training ```ply data```. Note that you need to install pandas and plyfile before using this script. This function can also be used for data preprocessing for training ```ply data```.
```python ```python
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from plyfile import PlyData from plyfile import PlyData
def conver_ply(input_path, output_path): def convert_ply(input_path, output_path):
plydata = PlyData.read(input_path) # read file plydata = PlyData.read(input_path) # read file
data = plydata.elements[0].data # read data data = plydata.elements[0].data # read data
data_pd = pd.DataFrame(data) # convert to DataFrame data_pd = pd.DataFrame(data) # convert to DataFrame
...@@ -223,12 +225,31 @@ def conver_ply(input_path, output_path): ...@@ -223,12 +225,31 @@ def conver_ply(input_path, output_path):
data_np[:, i] = data_pd[name] data_np[:, i] = data_pd[name]
data_np.astype(np.float32).tofile(output_path) data_np.astype(np.float32).tofile(output_path)
``` ```
Examples: Examples:
```python ```python
convert_ply('./test.ply', './test.bin') convert_ply('./test.ply', './test.bin')
``` ```
If you have point clouds in other format (`off`, `obj`, etc.), you can use trimesh to convert them into `ply`.
```python
import trimesh
def to_ply(input_path, output_path, original_type):
mesh = trimesh.load(input_path, file_type=original_type) # read file
mesh.export(output_path, file_type='ply') # convert to ply
```
Examples:
```python
to_ply('./test.obj', './test.ply', 'obj')
```
More demos about single/multi-modality and indoor/outdoor 3D detection can be found in [demo](0_demo.md).
## High-level APIs for testing point clouds ## High-level APIs for testing point clouds
### Synchronous interface ### Synchronous interface
......
...@@ -13,6 +13,7 @@ Welcome to MMDetection3D's documentation! ...@@ -13,6 +13,7 @@ Welcome to MMDetection3D's documentation!
:maxdepth: 2 :maxdepth: 2
:caption: Quick Run :caption: Quick Run
0_demo.md
1_exist_data_model.md 1_exist_data_model.md
2_new_data_model.md 2_new_data_model.md
......
from .inference import (convert_SyncBN, inference_detector, init_detector, from .inference import (convert_SyncBN, inference_detector,
inference_multi_modality_detector, init_detector,
show_result_meshlab) show_result_meshlab)
from .test import single_gpu_test from .test import single_gpu_test
__all__ = [ __all__ = [
'inference_detector', 'init_detector', 'single_gpu_test', 'inference_detector', 'init_detector', 'single_gpu_test',
'show_result_meshlab', 'convert_SyncBN' 'show_result_meshlab', 'convert_SyncBN',
'inference_multi_modality_detector'
] ]
import mmcv import mmcv
import numpy as np
import re
import torch import torch
from copy import deepcopy from copy import deepcopy
from mmcv.parallel import collate, scatter from mmcv.parallel import collate, scatter
from mmcv.runner import load_checkpoint from mmcv.runner import load_checkpoint
from os import path as osp from os import path as osp
from mmdet3d.core import Box3DMode, show_result from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes,
LiDARInstance3DBoxes, show_multi_modality_result,
show_result)
from mmdet3d.core.bbox import get_box_type from mmdet3d.core.bbox import get_box_type
from mmdet3d.datasets.pipelines import Compose from mmdet3d.datasets.pipelines import Compose
from mmdet3d.models import build_detector from mmdet3d.models import build_detector
...@@ -106,13 +110,89 @@ def inference_detector(model, pcd): ...@@ -106,13 +110,89 @@ def inference_detector(model, pcd):
return result, data return result, data
def show_result_meshlab(data, result, out_dir): def inference_multi_modality_detector(model, pcd, image, ann_file):
"""Inference point cloud with the multimodality detector.
Args:
model (nn.Module): The loaded detector.
pcd (str): Point cloud files.
image (str): Image files.
ann_file (str): Annotation files.
Returns:
tuple: Predicted results and data from pipeline.
"""
cfg = model.cfg
device = next(model.parameters()).device # model device
# build the data pipeline
test_pipeline = deepcopy(cfg.data.test.pipeline)
test_pipeline = Compose(test_pipeline)
box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)
# get data info containing calib
data_infos = mmcv.load(ann_file)
image_idx = int(re.findall(r'\d+', image)[-1]) # xxx/sunrgbd_000017.jpg
for x in data_infos:
if int(x['image']['image_idx']) != image_idx:
continue
info = x
break
data = dict(
pts_filename=pcd,
img_prefix=osp.dirname(image),
img_info=dict(filename=osp.basename(image)),
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d,
img_fields=[],
bbox3d_fields=[],
pts_mask_fields=[],
pts_seg_fields=[],
bbox_fields=[],
mask_fields=[],
seg_fields=[])
# depth map points to image conversion
if box_mode_3d == Box3DMode.DEPTH:
data.update(dict(calib=info['calib']))
data = test_pipeline(data)
# LiDAR to image conversion
if box_mode_3d == Box3DMode.LIDAR:
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c
data['img_metas'][0].data['lidar2img'] = lidar2img
elif box_mode_3d == Box3DMode.DEPTH:
data['calib'][0]['Rt'] = data['calib'][0]['Rt'].astype(np.float32)
data['calib'][0]['K'] = data['calib'][0]['K'].astype(np.float32)
data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
# scatter to specified GPU
data = scatter(data, [device.index])[0]
else:
# this is a workaround to avoid the bug of MMDataParallel
data['img_metas'] = data['img_metas'][0].data
data['points'] = data['points'][0].data
data['img'] = data['img'][0].data
if box_mode_3d == Box3DMode.DEPTH:
data['calib'] = data['calib'][0].data
# forward the model
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
return result, data
def show_result_meshlab(data, result, out_dir, score_thr=0.0):
"""Show result by meshlab. """Show result by meshlab.
Args: Args:
data (dict): Contain data from pipeline. data (dict): Contain data from pipeline.
result (dict): Predicted result from model. result (dict): Predicted result from model.
out_dir (str): Directory to save visualized result. out_dir (str): Directory to save visualized result.
score_thr (float): Minimum score of bboxes to be shown. Default: 0.0
""" """
points = data['points'][0][0].cpu().numpy() points = data['points'][0][0].cpu().numpy()
pts_filename = data['img_metas'][0][0]['pts_filename'] pts_filename = data['img_metas'][0][0]['pts_filename']
...@@ -122,14 +202,67 @@ def show_result_meshlab(data, result, out_dir): ...@@ -122,14 +202,67 @@ def show_result_meshlab(data, result, out_dir):
if 'pts_bbox' in result[0].keys(): if 'pts_bbox' in result[0].keys():
pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy() pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
else: else:
pred_bboxes = result[0]['boxes_3d'].tensor.numpy() pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
pred_scores = result[0]['scores_3d'].numpy()
# filter out low score bboxes for visualization
if score_thr > 0:
inds = pred_scores > score_thr
pred_bboxes = pred_bboxes[inds]
# for now we convert points into depth mode # for now we convert points into depth mode
if data['img_metas'][0][0]['box_mode_3d'] != Box3DMode.DEPTH: box_mode = data['img_metas'][0][0]['box_mode_3d']
if box_mode != Box3DMode.DEPTH:
points = points[..., [1, 0, 2]] points = points[..., [1, 0, 2]]
points[..., 0] *= -1 points[..., 0] *= -1
pred_bboxes = Box3DMode.convert(pred_bboxes, show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)
data['img_metas'][0][0]['box_mode_3d'], else:
Box3DMode.DEPTH) show_bboxes = deepcopy(pred_bboxes)
show_result(points, None, pred_bboxes, out_dir, file_name, show=False) show_result(points, None, show_bboxes, out_dir, file_name, show=False)
if 'img' not in data.keys():
return out_dir, file_name
# multi-modality visualization
# project 3D bbox to 2D image plane
if box_mode == Box3DMode.LIDAR:
if 'lidar2img' not in data['img_metas'][0][0]:
raise NotImplementedError(
'LiDAR to image transformation matrix is not provided')
show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
img = mmcv.imread(data['img_metas'][0][0]['filename'])
show_multi_modality_result(
img,
None,
show_bboxes,
data['img_metas'][0][0]['lidar2img'],
out_dir,
file_name,
show=False)
elif box_mode == Box3DMode.DEPTH:
if 'calib' not in data.keys():
raise NotImplementedError(
'camera calibration information is not provided')
show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
img = mmcv.imread(data['img_metas'][0][0]['filename'])
show_multi_modality_result(
img,
None,
show_bboxes,
data['calib'][0],
out_dir,
file_name,
depth_bbox=True,
img_metas=data['img_metas'][0][0],
show=False)
else:
raise NotImplementedError(
f'visualization of {box_mode} bbox is not supported')
return out_dir, file_name return out_dir, file_name
from .show_result import show_result, show_seg_result from .show_result import (show_multi_modality_result, show_result,
show_seg_result)
__all__ = ['show_result', 'show_seg_result'] __all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
...@@ -365,7 +365,7 @@ def project_pts_on_img(points, ...@@ -365,7 +365,7 @@ def project_pts_on_img(points,
color=tuple(color), color=tuple(color),
thickness=thickness, thickness=thickness,
) )
cv2.imshow('project_pts_img', img) cv2.imshow('project_pts_img', img.astype(np.uint8))
cv2.waitKey(100) cv2.waitKey(100)
...@@ -407,10 +407,117 @@ def project_bbox3d_on_img(bboxes3d, ...@@ -407,10 +407,117 @@ def project_bbox3d_on_img(bboxes3d,
(corners[end, 0], corners[end, 1]), color, thickness, (corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA) cv2.LINE_AA)
cv2.imshow('project_bbox3d_img', img) cv2.imshow('project_bbox3d_img', img.astype(np.uint8))
cv2.waitKey(0) cv2.waitKey(0)
def draw_lidar_bbox3d_on_img(bboxes3d,
raw_img,
lidar2img_rt,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (numpy.array, shape=[M, 7]):
3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
img_metas (dict): Useless here.
color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
img = raw_img.copy()
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
pts_4d = np.concatenate(
[corners_3d.reshape(-1, 3),
np.ones((num_bbox * 8, 1))], axis=-1)
lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
if isinstance(lidar2img_rt, torch.Tensor):
lidar2img_rt = lidar2img_rt.cpu().numpy()
pts_2d = pts_4d @ lidar2img_rt.T
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_bbox):
corners = imgfov_pts_2d[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
def draw_depth_bbox3d_on_img(bboxes3d,
raw_img,
calibs,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (numpy.array, shape=[M, 7]):
3d camera bbox (x, y, z, dx, dy, dz, yaw) to visualize.
raw_img (numpy.array): The numpy array of image.
calibs (dict): Camera calibration information, Rt and K.
img_metas (dict): Used in coordinates transformation.
color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core import Coord3DMode
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.models import apply_3d_transformation
img = raw_img.copy()
calibs = copy.deepcopy(calibs)
img_metas = copy.deepcopy(img_metas)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
assert ('Rt' in calibs.keys() and 'K' in calibs.keys()), \
'Rt and K matrix should be provided as camera caliberation information'
if not isinstance(calibs['Rt'], torch.Tensor):
calibs['Rt'] = torch.from_numpy(np.array(calibs['Rt']))
if not isinstance(calibs['K'], torch.Tensor):
calibs['K'] = torch.from_numpy(np.array(calibs['K']))
calibs['Rt'] = calibs['Rt'].reshape(3, 3).float().cpu()
calibs['K'] = calibs['K'].reshape(3, 3).float().cpu()
# first reverse the data transformations
xyz_depth = apply_3d_transformation(
points_3d, 'DEPTH', img_metas, reverse=True)
# then convert from depth coords to camera coords
xyz_cam = Coord3DMode.convert_point(
xyz_depth, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=calibs['Rt'])
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_cam, calibs['K'])
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_bbox):
corners = imgfov_pts_2d[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
class Visualizer(object): class Visualizer(object):
r"""Online visualizer implemented with Open3d. r"""Online visualizer implemented with Open3d.
......
...@@ -77,7 +77,7 @@ def show_result(points, gt_bboxes, pred_bboxes, out_dir, filename, show=True): ...@@ -77,7 +77,7 @@ def show_result(points, gt_bboxes, pred_bboxes, out_dir, filename, show=True):
pred_bboxes (np.ndarray): Predicted boxes. pred_bboxes (np.ndarray): Predicted boxes.
out_dir (str): Path of output directory out_dir (str): Path of output directory
filename (str): Filename of the current frame. filename (str): Filename of the current frame.
show (bool): Visualize the results online. show (bool): Visualize the results online. Defaults to True.
""" """
if show: if show:
from .open3d_vis import Visualizer from .open3d_vis import Visualizer
...@@ -178,3 +178,70 @@ def show_seg_result(points, ...@@ -178,3 +178,70 @@ def show_seg_result(points,
if pred_seg is not None: if pred_seg is not None:
_write_obj(pred_seg_color, osp.join(result_path, _write_obj(pred_seg_color, osp.join(result_path,
f'{filename}_pred.obj')) f'{filename}_pred.obj'))
def show_multi_modality_result(img,
gt_bboxes,
pred_bboxes,
proj_mat,
out_dir,
filename,
depth_bbox=False,
img_metas=None,
show=True,
gt_bbox_color=(61, 102, 255),
pred_bbox_color=(241, 101, 72)):
"""Convert multi-modality detection results into 2D results.
Project the predicted 3D bbox to 2D image plane and visualize them.
Args:
img (np.ndarray): The numpy array of image in cv2 fashion.
gt_bboxes (np.ndarray): Ground truth boxes.
pred_bboxes (np.ndarray): Predicted boxes.
proj_mat (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
depth_bbox (bool): Whether we are projecting camera bbox or lidar bbox.
img_metas (dict): Used in projecting cameta bbox.
show (bool): Visualize the results online. Defaults to True.
gt_bbox_color (str or tuple(int)): Color of bbox lines.
The tuple of color should be in BGR order. Default: (255, 102, 61)
pred_bbox_color (str or tuple(int)): Color of bbox lines.
The tuple of color should be in BGR order. Default: (72, 101, 241)
"""
if depth_bbox:
from .open3d_vis import draw_depth_bbox3d_on_img as draw_bbox
else:
from .open3d_vis import draw_lidar_bbox3d_on_img as draw_bbox
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
if show:
show_img = img.copy()
if gt_bboxes is not None:
show_img = draw_bbox(
gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
if pred_bboxes is not None:
show_img = draw_bbox(
pred_bboxes,
show_img,
proj_mat,
img_metas,
color=pred_bbox_color)
mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
if img is not None:
mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
if gt_bboxes is not None:
gt_img = draw_bbox(
gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
if pred_bboxes is not None:
pred_img = draw_bbox(
pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
...@@ -8,9 +8,9 @@ from mmcv.utils import print_log ...@@ -8,9 +8,9 @@ from mmcv.utils import print_log
from os import path as osp from os import path as osp
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core import show_result from ..core import show_multi_modality_result, show_result
from ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode, from ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
points_cam2img) LiDARInstance3DBoxes, points_cam2img)
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -688,11 +688,28 @@ class KittiDataset(Custom3DDataset): ...@@ -688,11 +688,28 @@ class KittiDataset(Custom3DDataset):
points = example['points'][0]._data.numpy() points = example['points'][0]._data.numpy()
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH) Coord3DMode.DEPTH)
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR, show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH) Box3DMode.DEPTH)
pred_bboxes = result['boxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy()
pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR, show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH) Box3DMode.DEPTH)
show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
show) file_name, show)
# multi-modality visualization
if self.modality['use_camera'] and \
'lidar2img' in example['img_metas'][0]._data.keys():
img = mmcv.imread(example['img_metas'][0]._data['filename'])
show_pred_bboxes = LiDARInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
show_gt_bboxes = LiDARInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
show_gt_bboxes,
show_pred_bboxes,
example['img_metas'][0]._data['lidar2img'],
out_dir,
file_name,
show=False)
...@@ -415,7 +415,7 @@ class LyftDataset(Custom3DDataset): ...@@ -415,7 +415,7 @@ class LyftDataset(Custom3DDataset):
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH) Coord3DMode.DEPTH)
inds = result['pts_bbox']['scores_3d'] > 0.1 inds = result['pts_bbox']['scores_3d'] > 0.1
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR, gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH) Box3DMode.DEPTH)
pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy() pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy()
......
...@@ -507,7 +507,7 @@ class NuScenesDataset(Custom3DDataset): ...@@ -507,7 +507,7 @@ class NuScenesDataset(Custom3DDataset):
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR, points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH) Coord3DMode.DEPTH)
inds = result['pts_bbox']['scores_3d'] > 0.1 inds = result['pts_bbox']['scores_3d'] > 0.1
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR, gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH) Box3DMode.DEPTH)
pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy() pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment