Commit 27a546e9 authored by ZCMax's avatar ZCMax Committed by ChaimZhu
Browse files

[Refactor] Visualization Refactor

parent 0e17beab
......@@ -14,6 +14,11 @@ env_cfg = dict(
dist_cfg=dict(backend='nccl'),
)
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
log_level = 'INFO'
load_from = None
resume = False
......
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from mmdet3d.apis import (inference_mono_3d_detector, init_model,
show_result_meshlab)
import mmcv
from mmdet3d.apis import inference_mono_3d_detector, init_model
from mmdet3d.registry import VISUALIZERS
from mmdet3d.utils import register_all_modules
def main():
def parse_args():
parser = ArgumentParser()
parser.add_argument('image', help='image file')
parser.add_argument('ann', help='ann file')
......@@ -26,21 +29,42 @@ def main():
action='store_true',
help='whether to save online visualization results')
args = parser.parse_args()
return args
def main(args):
# register all modules in mmdet into the registries
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = {
'CLASSES': model.CLASSES,
'PALETTE': model.PALETTE
}
# test a single image
result, data = inference_mono_3d_detector(model, args.image, args.ann)
img = mmcv.imread(args.img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
data_input = dict(img=img)
# show the results
show_result_meshlab(
data,
result,
args.out_dir,
args.score_thr,
show=args.show,
snapshot=args.snapshot,
task='mono-det')
visualizer.add_datasample(
'result',
data_input,
pred_sample=result,
show=True,
wait_time=0,
out_file=args.out_file,
pred_score_thr=args.score_thr,
vis_task='multi_modality-det')
if __name__ == '__main__':
main()
args = parse_args()
main(args)
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from mmdet3d.apis import (inference_multi_modality_detector, init_model,
show_result_meshlab)
import mmcv
import numpy as np
from mmdet3d.apis import inference_multi_modality_detector, init_model
from mmdet3d.registry import VISUALIZERS
from mmdet3d.utils import register_all_modules
def main():
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('image', help='image file')
......@@ -27,22 +31,44 @@ def main():
action='store_true',
help='whether to save online visualization results')
args = parser.parse_args()
return args
def main(args):
# register all modules in mmdet into the registries
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single image
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = {
'CLASSES': model.CLASSES,
'PALETTE': model.PALETTE
}
# test a single image and point cloud sample
result, data = inference_multi_modality_detector(model, args.pcd,
args.image, args.ann)
points = np.fromfile(args.pcd, dtype=np.float32)
img = mmcv.imread(args.img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
data_input = dict(points=points, img=img)
# show the results
show_result_meshlab(
data,
result,
args.out_dir,
args.score_thr,
show=args.show,
snapshot=args.snapshot,
task='multi_modality-det')
visualizer.add_datasample(
'result',
data_input,
pred_sample=result,
show=True,
wait_time=0,
out_file=args.out_file,
pred_score_thr=args.score_thr,
vis_task='multi_modality-det')
if __name__ == '__main__':
main()
args = parse_args()
main(args)
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from mmdet3d.apis import inference_segmentor, init_model, show_result_meshlab
import numpy as np
from mmdet3d.apis import inference_segmentor, init_model
from mmdet3d.registry import VISUALIZERS
from mmdet3d.utils import register_all_modules
def main():
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('config', help='Config file')
......@@ -22,21 +26,40 @@ def main():
action='store_true',
help='whether to save online visualization results')
args = parser.parse_args()
return args
def main(args):
# register all modules in mmdet into the registries
register_all_modules()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single image
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = {
'CLASSES': model.CLASSES,
'PALETTE': model.PALETTE
}
# test a single point cloud sample
result, data = inference_segmentor(model, args.pcd)
points = np.fromfile(args.pcd, dtype=np.float32)
data_input = dict(points=points)
# show the results
show_result_meshlab(
data,
result,
args.out_dir,
show=args.show,
snapshot=args.snapshot,
task='seg',
palette=model.PALETTE)
visualizer.add_datasample(
'result',
data_input,
pred_sample=result,
show=True,
wait_time=0,
out_file=args.out_file,
pred_score_thr=args.score_thr,
vis_task='seg')
if __name__ == '__main__':
main()
args = parse_args()
main(args)
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
from mmdet3d.apis import inference_detector, init_model, show_result_meshlab
import numpy as np
from mmdet3d.apis import inference_detector, init_model
from mmdet3d.registry import VISUALIZERS
from mmdet3d.utils import register_all_modules
def main():
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('config', help='Config file')
......@@ -24,21 +28,41 @@ def main():
action='store_true',
help='whether to save online visualization results')
args = parser.parse_args()
return args
def main(args):
# register all modules in mmdet into the registries
register_all_modules()
# TODO: Support inference of point cloud numpy file.
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single image
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = {
'CLASSES': model.CLASSES,
'PALETTE': model.palette
}
# test a single point cloud sample
result, data = inference_detector(model, args.pcd)
points = np.fromfile(args.pcd, dtype=np.float32)
data_input = dict(points=points)
# show the results
show_result_meshlab(
data,
result,
args.out_dir,
args.score_thr,
show=args.show,
snapshot=args.snapshot,
task='det')
visualizer.add_datasample(
'result',
data_input,
pred_sample=result,
show=True,
wait_time=0,
out_file=args.out_file,
pred_score_thr=args.score_thr,
vis_task='det')
if __name__ == '__main__':
main()
args = parse_args()
main(args)
......@@ -2,13 +2,12 @@
from .inference import (convert_SyncBN, inference_detector,
inference_mono_3d_detector,
inference_multi_modality_detector, inference_segmentor,
init_model, show_result_meshlab)
init_model)
__all__ = [
'inference_detector',
'init_model',
'inference_mono_3d_detector',
'show_result_meshlab',
'convert_SyncBN',
'inference_multi_modality_detector',
'inference_segmentor',
......
......@@ -9,10 +9,7 @@ import torch
from mmcv.parallel import collate, scatter
from mmcv.runner import load_checkpoint
from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
DepthInstance3DBoxes, LiDARInstance3DBoxes,
show_multi_modality_result, show_result,
show_seg_result)
from mmdet3d.core import Box3DMode
from mmdet3d.core.bbox import get_box_type
from mmdet3d.datasets.pipelines import Compose
from mmdet3d.models import build_model
......@@ -323,204 +320,3 @@ def inference_segmentor(model, pcd):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
return result, data
def show_det_result_meshlab(data,
result,
out_dir,
score_thr=0.0,
show=False,
snapshot=False):
"""Show 3D detection result by meshlab."""
points = data['points'][0][0].cpu().numpy()
pts_filename = data['img_metas'][0][0]['pts_filename']
file_name = osp.split(pts_filename)[-1].split('.')[0]
if 'pts_bbox' in result[0].keys():
pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
else:
pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
pred_scores = result[0]['scores_3d'].numpy()
# filter out low score bboxes for visualization
if score_thr > 0:
inds = pred_scores > score_thr
pred_bboxes = pred_bboxes[inds]
# for now we convert points into depth mode
box_mode = data['img_metas'][0][0]['box_mode_3d']
if box_mode != Box3DMode.DEPTH:
points = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH)
show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)
else:
show_bboxes = deepcopy(pred_bboxes)
show_result(
points,
None,
show_bboxes,
out_dir,
file_name,
show=show,
snapshot=snapshot)
return file_name
def show_seg_result_meshlab(data,
result,
out_dir,
palette,
show=False,
snapshot=False):
"""Show 3D segmentation result by meshlab."""
points = data['points'][0][0].cpu().numpy()
pts_filename = data['img_metas'][0][0]['pts_filename']
file_name = osp.split(pts_filename)[-1].split('.')[0]
pred_seg = result[0]['semantic_mask'].numpy()
if palette is None:
# generate random color map
max_idx = pred_seg.max()
palette = np.random.randint(0, 256, size=(max_idx + 1, 3))
palette = np.array(palette).astype(np.int)
show_seg_result(
points,
None,
pred_seg,
out_dir,
file_name,
palette=palette,
show=show,
snapshot=snapshot)
return file_name
def show_proj_det_result_meshlab(data,
result,
out_dir,
score_thr=0.0,
show=False,
snapshot=False):
"""Show result of projecting 3D bbox to 2D image by meshlab."""
assert 'img' in data.keys(), 'image data is not provided for visualization'
img_filename = data['img_metas'][0][0]['filename']
file_name = osp.split(img_filename)[-1].split('.')[0]
# read from file because img in data_dict has undergone pipeline transform
img = mmcv.imread(img_filename)
if 'pts_bbox' in result[0].keys():
result[0] = result[0]['pts_bbox']
elif 'img_bbox' in result[0].keys():
result[0] = result[0]['img_bbox']
pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
pred_scores = result[0]['scores_3d'].numpy()
# filter out low score bboxes for visualization
if score_thr > 0:
inds = pred_scores > score_thr
pred_bboxes = pred_bboxes[inds]
box_mode = data['img_metas'][0][0]['box_mode_3d']
if box_mode == Box3DMode.LIDAR:
if 'lidar2img' not in data['img_metas'][0][0]:
raise NotImplementedError(
'LiDAR to image transformation matrix is not provided')
show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
None,
show_bboxes,
data['img_metas'][0][0]['lidar2img'],
out_dir,
file_name,
box_mode='lidar',
show=show)
elif box_mode == Box3DMode.DEPTH:
show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
None,
show_bboxes,
None,
out_dir,
file_name,
box_mode='depth',
img_metas=data['img_metas'][0][0],
show=show)
elif box_mode == Box3DMode.CAM:
if 'cam2img' not in data['img_metas'][0][0]:
raise NotImplementedError(
'camera intrinsic matrix is not provided')
show_bboxes = CameraInstance3DBoxes(
pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))
show_multi_modality_result(
img,
None,
show_bboxes,
data['img_metas'][0][0]['cam2img'],
out_dir,
file_name,
box_mode='camera',
show=show)
else:
raise NotImplementedError(
f'visualization of {box_mode} bbox is not supported')
return file_name
def show_result_meshlab(data,
result,
out_dir,
score_thr=0.0,
show=False,
snapshot=False,
task='det',
palette=None):
"""Show result by meshlab.
Args:
data (dict): Contain data from pipeline.
result (dict): Predicted result from model.
out_dir (str): Directory to save visualized result.
score_thr (float, optional): Minimum score of bboxes to be shown.
Default: 0.0
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
task (str, optional): Distinguish which task result to visualize.
Currently we support 3D detection, multi-modality detection and
3D segmentation. Defaults to 'det'.
palette (list[list[int]]] | np.ndarray, optional): The palette
of segmentation map. If None is given, random palette will be
generated. Defaults to None.
"""
assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \
f'unsupported visualization task {task}'
assert out_dir is not None, 'Expect out_dir, got none.'
if task in ['det', 'multi_modality-det']:
file_name = show_det_result_meshlab(data, result, out_dir, score_thr,
show, snapshot)
if task in ['seg']:
file_name = show_seg_result_meshlab(data, result, out_dir, palette,
show, snapshot)
if task in ['multi_modality-det', 'mono-det']:
file_name = show_proj_det_result_meshlab(data, result, out_dir,
score_thr, show, snapshot)
return out_dir, file_name
......@@ -6,5 +6,5 @@ from .evaluation import * # noqa: F401, F403
from .points import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
from .visualizer import * # noqa: F401, F403
from .visualization import * # noqa: F401, F403
from .voxel import * # noqa: F401, F403
# Copyright (c) OpenMMLab. All rights reserved.
from .local_visualizer import Det3DLocalVisualizer
from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
proj_lidar_bbox3d_to_img, write_obj,
write_oriented_bbox)
__all__ = [
'Det3DLocalVisualizer', 'proj_depth_bbox3d_to_img',
'proj_camera_bbox3d_to_img', 'proj_lidar_bbox3d_to_img',
'write_oriented_bbox', 'write_obj'
]
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import numpy as np
import torch
import trimesh
from mmdet3d.core.bbox import Box3DMode, Coord3DMode
from mmdet3d.core.bbox.structures.cam_box3d import CameraInstance3DBoxes
from mmdet3d.core.bbox.structures.depth_box3d import DepthInstance3DBoxes
from mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes
def write_obj(points, out_filename):
"""Write points into ``obj`` format for meshlab visualization.
Args:
points (np.ndarray): Points in shape (N, dim).
out_filename (str): Filename to be saved.
"""
N = points.shape[0]
fout = open(out_filename, 'w')
for i in range(N):
if points.shape[1] == 6:
c = points[i, 3:].astype(int)
fout.write(
'v %f %f %f %d %d %d\n' %
(points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
else:
fout.write('v %f %f %f\n' %
(points[i, 0], points[i, 1], points[i, 2]))
fout.close()
def write_oriented_bbox(scene_bbox, out_filename):
"""Export oriented (around Z axis) scene bbox to meshes.
Args:
scene_bbox(list[ndarray] or ndarray): xyz pos of center and
3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
Y forward, X right, Z upward. heading angle of positive X is 0,
heading angle of positive Y is 90 degrees.
out_filename(str): Filename.
"""
def heading2rotmat(heading_angle):
rotmat = np.zeros((3, 3))
rotmat[2, 2] = 1
cosval = np.cos(heading_angle)
sinval = np.sin(heading_angle)
rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
return rotmat
def convert_oriented_box_to_trimesh_fmt(box):
ctr = box[:3]
lengths = box[3:6]
trns = np.eye(4)
trns[0:3, 3] = ctr
trns[3, 3] = 1.0
trns[0:3, 0:3] = heading2rotmat(box[6])
box_trimesh_fmt = trimesh.creation.box(lengths, trns)
return box_trimesh_fmt
if len(scene_bbox) == 0:
scene_bbox = np.zeros((1, 7))
scene = trimesh.scene.Scene()
for box in scene_bbox:
scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
mesh_list = trimesh.util.concatenate(scene.dump())
# save to obj file
trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
return
def to_depth_mode(points, bboxes):
"""Convert points and bboxes to Depth Coord and Depth Box mode."""
if points is not None:
points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
if bboxes is not None:
bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
Box3DMode.DEPTH)
return points, bboxes
# TODO: refactor lidar2img to img_meta
def proj_lidar_bbox3d_to_img(bboxes_3d: LiDARInstance3DBoxes,
input_meta: dict) -> np.array:
"""Project the 3D bbox on 2D plane.
Args:
bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3d bbox in lidar coordinate system to visualize.
lidar2img (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
"""
corners_3d = bboxes_3d.corners
num_bbox = corners_3d.shape[0]
pts_4d = np.concatenate(
[corners_3d.reshape(-1, 3),
np.ones((num_bbox * 8, 1))], axis=-1)
lidar2img = copy.deepcopy(input_meta['lidar2img']).reshape(4, 4)
if isinstance(lidar2img, torch.Tensor):
lidar2img = lidar2img.cpu().numpy()
pts_2d = pts_4d @ lidar2img.T
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
return imgfov_pts_2d
# TODO: remove third parameter in all functions here in favour of img_metas
def proj_depth_bbox3d_to_img(bboxes_3d: DepthInstance3DBoxes,
input_meta: dict) -> np.array:
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes_3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
3d bbox in depth coordinate system to visualize.
input_meta (dict): Used in coordinates transformation.
"""
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.models import apply_3d_transformation
input_meta = copy.deepcopy(input_meta)
corners_3d = bboxes_3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
# first reverse the data transformations
xyz_depth = apply_3d_transformation(
points_3d, 'DEPTH', input_meta, reverse=True)
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_depth,
xyz_depth.new_tensor(input_meta['depth2img']))
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return imgfov_pts_2d
# project the camera bboxes 3d to image
def proj_camera_bbox3d_to_img(bboxes_3d: CameraInstance3DBoxes,
input_meta: dict) -> np.array:
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes_3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
3d bbox in camera coordinate system to visualize.
cam2img (np.array)): Camera intrinsic matrix,
denoted as `K` in depth bbox coordinate system.
"""
from mmdet3d.core.bbox import points_cam2img
cam2img = copy.deepcopy(input_meta['cam2img'])
corners_3d = bboxes_3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
if not isinstance(cam2img, torch.Tensor):
cam2img = torch.from_numpy(np.array(cam2img))
assert (cam2img.shape == torch.Size([3, 3])
or cam2img.shape == torch.Size([4, 4]))
cam2img = cam2img.float().cpu()
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(points_3d, cam2img)
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return imgfov_pts_2d
# Copyright (c) OpenMMLab. All rights reserved.
from .show_result import (show_multi_modality_result, show_result,
show_seg_result)
__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import cv2
import numpy as np
import torch
from matplotlib import pyplot as plt
def project_pts_on_img(points,
raw_img,
lidar2img_rt,
max_distance=70,
thickness=-1):
"""Project the 3D points cloud on 2D image.
Args:
points (numpy.array): 3D points cloud (x, y, z) to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
max_distance (float, optional): the max distance of the points cloud.
Default: 70.
thickness (int, optional): The thickness of 2D points. Default: -1.
"""
img = raw_img.copy()
num_points = points.shape[0]
pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
pts_2d = pts_4d @ lidar2img_rt.T
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
fov_inds = ((pts_2d[:, 0] < img.shape[1])
& (pts_2d[:, 0] >= 0)
& (pts_2d[:, 1] < img.shape[0])
& (pts_2d[:, 1] >= 0))
imgfov_pts_2d = pts_2d[fov_inds, :3] # u, v, d
cmap = plt.cm.get_cmap('hsv', 256)
cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
for i in range(imgfov_pts_2d.shape[0]):
depth = imgfov_pts_2d[i, 2]
color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
cv2.circle(
img,
center=(int(np.round(imgfov_pts_2d[i, 0])),
int(np.round(imgfov_pts_2d[i, 1]))),
radius=1,
color=tuple(color),
thickness=thickness,
)
cv2.imshow('project_pts_img', img.astype(np.uint8))
cv2.waitKey(100)
def plot_rect3d_on_img(img,
num_rects,
rect_corners,
color=(0, 255, 0),
thickness=1):
"""Plot the boundary lines of 3D rectangular on 2D images.
Args:
img (numpy.array): The numpy array of image.
num_rects (int): Number of 3D rectangulars.
rect_corners (numpy.array): Coordinates of the corners of 3D
rectangulars. Should be in the shape of [num_rect, 8, 2].
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_rects):
corners = rect_corners[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
def draw_lidar_bbox3d_on_img(bboxes3d,
raw_img,
lidar2img_rt,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`LiDARInstance3DBoxes`):
3d bbox in lidar coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
img = raw_img.copy()
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
pts_4d = np.concatenate(
[corners_3d.reshape(-1, 3),
np.ones((num_bbox * 8, 1))], axis=-1)
lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
if isinstance(lidar2img_rt, torch.Tensor):
lidar2img_rt = lidar2img_rt.cpu().numpy()
pts_2d = pts_4d @ lidar2img_rt.T
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
# TODO: remove third parameter in all functions here in favour of img_metas
def draw_depth_bbox3d_on_img(bboxes3d,
raw_img,
calibs,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
3d bbox in depth coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
calibs (dict): Camera calibration information, Rt and K.
img_metas (dict): Used in coordinates transformation.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.models import apply_3d_transformation
img = raw_img.copy()
img_metas = copy.deepcopy(img_metas)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
# first reverse the data transformations
xyz_depth = apply_3d_transformation(
points_3d, 'DEPTH', img_metas, reverse=True)
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_depth,
xyz_depth.new_tensor(img_metas['depth2img']))
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
def draw_camera_bbox3d_on_img(bboxes3d,
raw_img,
cam2img,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
3d bbox in camera coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
cam2img (dict): Camera intrinsic matrix,
denoted as `K` in depth bbox coordinate system.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core.bbox import points_cam2img
img = raw_img.copy()
cam2img = copy.deepcopy(cam2img)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
if not isinstance(cam2img, torch.Tensor):
cam2img = torch.from_numpy(np.array(cam2img))
assert (cam2img.shape == torch.Size([3, 3])
or cam2img.shape == torch.Size([4, 4]))
cam2img = cam2img.float().cpu()
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(points_3d, cam2img)
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import numpy as np
import torch
try:
import open3d as o3d
from open3d import geometry
except ImportError:
raise ImportError(
'Please run "pip install open3d" to install open3d first.')
def _draw_points(points,
vis,
points_size=2,
point_color=(0.5, 0.5, 0.5),
mode='xyz'):
"""Draw points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
Returns:
tuple: points, color of each point.
"""
vis.get_render_option().point_size = points_size # set points size
if isinstance(points, torch.Tensor):
points = points.cpu().numpy()
points = points.copy()
pcd = geometry.PointCloud()
if mode == 'xyz':
pcd.points = o3d.utility.Vector3dVector(points[:, :3])
points_colors = np.tile(np.array(point_color), (points.shape[0], 1))
elif mode == 'xyzrgb':
pcd.points = o3d.utility.Vector3dVector(points[:, :3])
points_colors = points[:, 3:6]
# normalize to [0, 1] for open3d drawing
if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
points_colors /= 255.0
else:
raise NotImplementedError
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.add_geometry(pcd)
return pcd, points_colors
def _draw_bboxes(bbox3d,
vis,
points_colors,
pcd=None,
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox on visualizer and change the color of points inside bbox3d.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points inside bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if isinstance(bbox3d, torch.Tensor):
bbox3d = bbox3d.cpu().numpy()
bbox3d = bbox3d.copy()
in_box_color = np.array(points_in_box_color)
for i in range(len(bbox3d)):
center = bbox3d[i, 0:3]
dim = bbox3d[i, 3:6]
yaw = np.zeros(3)
yaw[rot_axis] = bbox3d[i, 6]
rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
if center_mode == 'lidar_bottom':
center[rot_axis] += dim[
rot_axis] / 2 # bottom center to gravity center
elif center_mode == 'camera_bottom':
center[rot_axis] -= dim[
rot_axis] / 2 # bottom center to gravity center
box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
line_set.paint_uniform_color(bbox_color)
# draw bboxes on visualizer
vis.add_geometry(line_set)
# change the color of points which are in box
if pcd is not None and mode == 'xyz':
indices = box3d.get_point_indices_within_bounding_box(pcd.points)
points_colors[indices] = in_box_color
# update points colors
if pcd is not None:
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.update_geometry(pcd)
def show_pts_boxes(points,
bbox3d=None,
show=True,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox and points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is bottom
center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points, available
mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert 0 <= rot_axis <= 2
# init visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
vis.add_geometry(mesh_frame)
# draw points
pcd, points_colors = _draw_points(points, vis, points_size, point_color,
mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,
points_in_box_color, rot_axis, center_mode, mode)
if show:
vis.run()
if save_path is not None:
vis.capture_screen_image(save_path)
vis.destroy_window()
def _draw_bboxes_ind(bbox3d,
vis,
indices,
points_colors,
pcd=None,
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox on visualizer and change the color or points inside bbox3d
with indices.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
indices (numpy.array | torch.tensor, shape=[N, M]):
indicate which bbox3d that each point lies in.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if isinstance(bbox3d, torch.Tensor):
bbox3d = bbox3d.cpu().numpy()
if isinstance(indices, torch.Tensor):
indices = indices.cpu().numpy()
bbox3d = bbox3d.copy()
in_box_color = np.array(points_in_box_color)
for i in range(len(bbox3d)):
center = bbox3d[i, 0:3]
dim = bbox3d[i, 3:6]
yaw = np.zeros(3)
# TODO: fix problem of current coordinate system
# dim[0], dim[1] = dim[1], dim[0] # for current coordinate
# yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
yaw[rot_axis] = -bbox3d[i, 6]
rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
if center_mode == 'lidar_bottom':
center[rot_axis] += dim[
rot_axis] / 2 # bottom center to gravity center
elif center_mode == 'camera_bottom':
center[rot_axis] -= dim[
rot_axis] / 2 # bottom center to gravity center
box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
line_set.paint_uniform_color(bbox_color)
# draw bboxes on visualizer
vis.add_geometry(line_set)
# change the color of points which are in box
if pcd is not None and mode == 'xyz':
points_colors[indices[:, i].astype(np.bool)] = in_box_color
# update points colors
if pcd is not None:
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.update_geometry(pcd)
def show_pts_index_boxes(points,
bbox3d=None,
show=True,
indices=None,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox and points on visualizer with indices that indicate which
bbox3d that each point lies in.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
indices (numpy.array | torch.tensor, shape=[N, M], optional):
indicate which bbox3d that each point lies in. Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert 0 <= rot_axis <= 2
# init visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
vis.add_geometry(mesh_frame)
# draw points
pcd, points_colors = _draw_points(points, vis, points_size, point_color,
mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,
points_in_box_color, rot_axis, center_mode, mode)
if show:
vis.run()
if save_path is not None:
vis.capture_screen_image(save_path)
vis.destroy_window()
class Visualizer(object):
r"""Online visualizer implemented with Open3d.
Args:
points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
cloud is in mode of Coord3DMode.DEPTH (please refer to
core.structures.coord_3d_mode).
bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
(x, y, z, x_size, y_size, z_size, yaw) to visualize.
The 3D bbox is in mode of Box3DMode.DEPTH with
gravity_center (please refer to core.structures.box_3d_mode).
Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
def __init__(self,
points,
bbox3d=None,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
super(Visualizer, self).__init__()
assert 0 <= rot_axis <= 2
# init visualizer
self.o3d_visualizer = o3d.visualization.Visualizer()
self.o3d_visualizer.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
self.o3d_visualizer.add_geometry(mesh_frame)
self.points_size = points_size
self.point_color = point_color
self.bbox_color = bbox_color
self.points_in_box_color = points_in_box_color
self.rot_axis = rot_axis
self.center_mode = center_mode
self.mode = mode
self.seg_num = 0
# draw points
if points is not None:
self.pcd, self.points_colors = _draw_points(
points, self.o3d_visualizer, points_size, point_color, mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,
self.pcd, bbox_color, points_in_box_color, rot_axis,
center_mode, mode)
def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):
"""Add bounding box to visualizer.
Args:
bbox3d (numpy.array, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw)
to be visualized. The 3d bbox is in mode of
Box3DMode.DEPTH with gravity_center (please refer to
core.structures.box_3d_mode).
bbox_color (tuple[float]): the color of bbox. Default: None.
points_in_box_color (tuple[float]): the color of points which
are in bbox3d. Default: None.
"""
if bbox_color is None:
bbox_color = self.bbox_color
if points_in_box_color is None:
points_in_box_color = self.points_in_box_color
_draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,
bbox_color, points_in_box_color, self.rot_axis,
self.center_mode, self.mode)
def add_seg_mask(self, seg_mask_colors):
"""Add segmentation mask to visualizer via per-point colorization.
Args:
seg_mask_colors (numpy.array, shape=[N, 6]):
The segmentation mask whose first 3 dims are point coordinates
and last 3 dims are converted colors.
"""
# we can't draw the colors on existing points
# in case gt and pred mask would overlap
# instead we set a large offset along x-axis for each seg mask
self.seg_num += 1
offset = (np.array(self.pcd.points).max(0) -
np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[offset, 0, 0]) # create coordinate frame for seg
self.o3d_visualizer.add_geometry(mesh_frame)
seg_points = copy.deepcopy(seg_mask_colors)
seg_points[:, 0] += offset
_draw_points(
seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')
def show(self, save_path=None):
"""Visualize the points cloud.
Args:
save_path (str, optional): path to save image. Default: None.
"""
self.o3d_visualizer.run()
if save_path is not None:
self.o3d_visualizer.capture_screen_image(save_path)
self.o3d_visualizer.destroy_window()
return
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
import mmcv
import numpy as np
import trimesh
from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
draw_lidar_bbox3d_on_img)
def _write_obj(points, out_filename):
"""Write points into ``obj`` format for meshlab visualization.
Args:
points (np.ndarray): Points in shape (N, dim).
out_filename (str): Filename to be saved.
"""
N = points.shape[0]
fout = open(out_filename, 'w')
for i in range(N):
if points.shape[1] == 6:
c = points[i, 3:].astype(int)
fout.write(
'v %f %f %f %d %d %d\n' %
(points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
else:
fout.write('v %f %f %f\n' %
(points[i, 0], points[i, 1], points[i, 2]))
fout.close()
def _write_oriented_bbox(scene_bbox, out_filename):
"""Export oriented (around Z axis) scene bbox to meshes.
Args:
scene_bbox(list[ndarray] or ndarray): xyz pos of center and
3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
Y forward, X right, Z upward. heading angle of positive X is 0,
heading angle of positive Y is 90 degrees.
out_filename(str): Filename.
"""
def heading2rotmat(heading_angle):
rotmat = np.zeros((3, 3))
rotmat[2, 2] = 1
cosval = np.cos(heading_angle)
sinval = np.sin(heading_angle)
rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
return rotmat
def convert_oriented_box_to_trimesh_fmt(box):
ctr = box[:3]
lengths = box[3:6]
trns = np.eye(4)
trns[0:3, 3] = ctr
trns[3, 3] = 1.0
trns[0:3, 0:3] = heading2rotmat(box[6])
box_trimesh_fmt = trimesh.creation.box(lengths, trns)
return box_trimesh_fmt
if len(scene_bbox) == 0:
scene_bbox = np.zeros((1, 7))
scene = trimesh.scene.Scene()
for box in scene_bbox:
scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
mesh_list = trimesh.util.concatenate(scene.dump())
# save to obj file
trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
return
def show_result(points,
gt_bboxes,
pred_bboxes,
out_dir,
filename,
show=False,
snapshot=False,
pred_labels=None):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_bboxes (np.ndarray): Ground truth boxes.
pred_bboxes (np.ndarray): Predicted boxes.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
pred_labels (np.ndarray, optional): Predicted labels of boxes.
Defaults to None.
"""
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
if show:
from .open3d_vis import Visualizer
vis = Visualizer(points)
if pred_bboxes is not None:
if pred_labels is None:
vis.add_bboxes(bbox3d=pred_bboxes)
else:
palette = np.random.randint(
0, 255, size=(pred_labels.max() + 1, 3)) / 256
labelDict = {}
for j in range(len(pred_labels)):
i = int(pred_labels[j].numpy())
if labelDict.get(i) is None:
labelDict[i] = []
labelDict[i].append(pred_bboxes[j])
for i in labelDict:
vis.add_bboxes(
bbox3d=np.array(labelDict[i]),
bbox_color=palette[i],
points_in_box_color=palette[i])
if gt_bboxes is not None:
vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
show_path = osp.join(result_path,
f'{filename}_online.png') if snapshot else None
vis.show(show_path)
if points is not None:
_write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
if gt_bboxes is not None:
# bottom center to gravity center
gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2
_write_oriented_bbox(gt_bboxes,
osp.join(result_path, f'{filename}_gt.obj'))
if pred_bboxes is not None:
# bottom center to gravity center
pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2
_write_oriented_bbox(pred_bboxes,
osp.join(result_path, f'{filename}_pred.obj'))
def show_seg_result(points,
gt_seg,
pred_seg,
out_dir,
filename,
palette,
ignore_index=None,
show=False,
snapshot=False):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_seg (np.ndarray): Ground truth segmentation mask.
pred_seg (np.ndarray): Predicted segmentation mask.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
palette (np.ndarray): Mapping between class labels and colors.
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
"""
# we need 3D coordinates to visualize segmentation mask
if gt_seg is not None or pred_seg is not None:
assert points is not None, \
'3D coordinates are required for segmentation visualization'
# filter out ignored points
if gt_seg is not None and ignore_index is not None:
if points is not None:
points = points[gt_seg != ignore_index]
if pred_seg is not None:
pred_seg = pred_seg[gt_seg != ignore_index]
gt_seg = gt_seg[gt_seg != ignore_index]
if gt_seg is not None:
gt_seg_color = palette[gt_seg]
gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)
if pred_seg is not None:
pred_seg_color = palette[pred_seg]
pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],
axis=1)
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
# online visualization of segmentation mask
# we show three masks in a row, scene_points, gt_mask, pred_mask
if show:
from .open3d_vis import Visualizer
mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'
vis = Visualizer(points, mode=mode)
if gt_seg is not None:
vis.add_seg_mask(gt_seg_color)
if pred_seg is not None:
vis.add_seg_mask(pred_seg_color)
show_path = osp.join(result_path,
f'{filename}_online.png') if snapshot else None
vis.show(show_path)
if points is not None:
_write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
if gt_seg is not None:
_write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))
if pred_seg is not None:
_write_obj(pred_seg_color, osp.join(result_path,
f'{filename}_pred.obj'))
def show_multi_modality_result(img,
gt_bboxes,
pred_bboxes,
proj_mat,
out_dir,
filename,
box_mode='lidar',
img_metas=None,
show=False,
gt_bbox_color=(61, 102, 255),
pred_bbox_color=(241, 101, 72)):
"""Convert multi-modality detection results into 2D results.
Project the predicted 3D bbox to 2D image plane and visualize them.
Args:
img (np.ndarray): The numpy array of image in cv2 fashion.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
proj_mat (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
out_dir (str): Path of output directory.
filename (str): Filename of the current frame.
box_mode (str, optional): Coordinate system the boxes are in.
Should be one of 'depth', 'lidar' and 'camera'.
Defaults to 'lidar'.
img_metas (dict, optional): Used in projecting depth bbox.
Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (255, 102, 61).
pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (72, 101, 241).
"""
if box_mode == 'depth':
draw_bbox = draw_depth_bbox3d_on_img
elif box_mode == 'lidar':
draw_bbox = draw_lidar_bbox3d_on_img
elif box_mode == 'camera':
draw_bbox = draw_camera_bbox3d_on_img
else:
raise NotImplementedError(f'unsupported box mode {box_mode}')
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
if show:
show_img = img.copy()
if gt_bboxes is not None:
show_img = draw_bbox(
gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
if pred_bboxes is not None:
show_img = draw_bbox(
pred_bboxes,
show_img,
proj_mat,
img_metas,
color=pred_bbox_color)
mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
if img is not None:
mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
if gt_bboxes is not None:
gt_img = draw_bbox(
gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
if pred_bboxes is not None:
pred_img = draw_bbox(
pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import warnings
from os import path as osp
from pathlib import Path
import mmcv
import numpy as np
from mmcv import Config, DictAction, mkdir_or_exist
from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
show_seg_result)
from mmdet3d.datasets import build_dataset
from mmdet3d.registry import VISUALIZERS
from mmdet3d.utils import register_all_modules
def parse_args():
......@@ -80,8 +75,8 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':
show_pipeline.insert(i, cfg.train_pipeline[i])
# Collect points as well as labels
if cfg.train_pipeline[i]['type'] == 'Collect3D':
if show_pipeline[-1]['type'] == 'Collect3D':
if cfg.train_pipeline[i]['type'] == 'Pack3DInputs':
if show_pipeline[-1]['type'] == 'Pack3DInputs':
show_pipeline[-1] = cfg.train_pipeline[i]
else:
show_pipeline.append(cfg.train_pipeline[i])
......@@ -93,105 +88,6 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
return cfg
def to_depth_mode(points, bboxes):
"""Convert points and bboxes to Depth Coord and Depth Box mode."""
if points is not None:
points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
if bboxes is not None:
bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
Box3DMode.DEPTH)
return points, bboxes
def show_det_data(input, out_dir, show=False):
"""Visualize 3D point cloud and 3D bboxes."""
img_metas = input['img_metas']._data
points = input['points']._data.numpy()
gt_bboxes = input['gt_bboxes_3d']._data.tensor
if img_metas['box_mode_3d'] != Box3DMode.DEPTH:
points, gt_bboxes = to_depth_mode(points, gt_bboxes)
filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
show_result(
points,
gt_bboxes.clone(),
None,
out_dir,
filename,
show=show,
snapshot=True)
def show_seg_data(input, out_dir, show=False):
"""Visualize 3D point cloud and segmentation mask."""
img_metas = input['img_metas']._data
points = input['points']._data.numpy()
gt_seg = input['pts_semantic_mask']._data.numpy()
filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
show_seg_result(
points,
gt_seg.copy(),
None,
out_dir,
filename,
np.array(img_metas['PALETTE']),
img_metas['ignore_index'],
show=show,
snapshot=True)
def show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):
"""Visualize 3D bboxes on 2D image by projection."""
gt_bboxes = input['gt_bboxes_3d']._data
img_metas = input['img_metas']._data
img = input['img']._data.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
# no 3D gt bboxes, just show img
if gt_bboxes.tensor.shape[0] == 0:
gt_bboxes = None
filename = Path(img_metas['filename']).name
if isinstance(gt_bboxes, DepthInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
None,
out_dir,
filename,
box_mode='depth',
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
img_metas['lidar2img'],
out_dir,
filename,
box_mode='lidar',
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, CameraInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
img_metas['cam2img'],
out_dir,
filename,
box_mode='camera',
img_metas=img_metas,
show=show)
else:
# can't project, just show img
warnings.warn(
f'unrecognized gt box type {type(gt_bboxes)}, only show image')
show_multi_modality_result(
img, None, None, None, out_dir, filename, show=show)
def main():
args = parse_args()
......@@ -200,31 +96,42 @@ def main():
cfg = build_data_cfg(args.config, args.skip_type, args.aug,
args.cfg_options)
# register all modules in mmdet3d into the registries
register_all_modules()
try:
dataset = build_dataset(
cfg.data.train, default_args=dict(filter_empty_gt=False))
cfg.train_dataloader.dataset,
default_args=dict(filter_empty_gt=False))
except TypeError: # seg dataset doesn't have `filter_empty_gt` key
dataset = build_dataset(cfg.data.train)
dataset = build_dataset(cfg.train_dataloader.dataset)
dataset_type = cfg.dataset_type
# configure visualization mode
vis_task = args.task # 'det', 'seg', 'multi_modality-det', 'mono-det'
visualizer = VISUALIZERS.build(cfg.visualizer)
visualizer.dataset_meta = dataset.metainfo
progress_bar = mmcv.ProgressBar(len(dataset))
for input in dataset:
if vis_task in ['det', 'multi_modality-det']:
# show 3D bboxes on 3D point clouds
show_det_data(input, args.output_dir, show=args.online)
if vis_task in ['multi_modality-det', 'mono-det']:
# project 3D bboxes to 2D image
show_proj_bbox_img(
input,
args.output_dir,
show=args.online,
is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
elif vis_task in ['seg']:
# show 3D segmentation mask on 3D point clouds
show_seg_data(input, args.output_dir, show=args.online)
for item in dataset:
# the 3D Boxes in input could be in any of three coordinates
data_input = item['inputs']
data_sample = item['data_sample'].numpy()
out_file = osp.join(
args.output_dir) if args.output_dir is not None else None
visualizer.add_datasample(
'3d visualzier',
data_input,
data_sample,
show=not args.not_show,
wait_time=args.show_interval,
out_file=out_file,
vis_task=vis_task)
progress_bar.update()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment