Commit ba3cd005 authored by 雍大凯's avatar 雍大凯
Browse files

将子模块转换为普通目录

parent d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
def gaussian_2d(shape, sigma=1):
"""Generate gaussian map.
Args:
shape (list[int]): Shape of the map.
sigma (float, optional): Sigma to generate gaussian map.
Defaults to 1.
Returns:
np.ndarray: Generated gaussian map.
"""
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def draw_heatmap_gaussian(heatmap, center, radius, k=1):
"""Get gaussian masked heatmap.
Args:
heatmap (torch.Tensor): Heatmap to be masked.
center (torch.Tensor): Center coord of the heatmap.
radius (int): Radius of gaussian.
K (int, optional): Multiple of masked_gaussian. Defaults to 1.
Returns:
torch.Tensor: Masked heatmap.
"""
diameter = 2 * radius + 1
gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = torch.from_numpy(
gaussian[radius - top:radius + bottom,
radius - left:radius + right]).to(heatmap.device,
torch.float32)
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def gaussian_radius(det_size, min_overlap=0.5):
"""Get radius of gaussian.
Args:
det_size (tuple[torch.Tensor]): Size of the detection result.
min_overlap (float, optional): Gaussian_overlap. Defaults to 0.5.
Returns:
torch.Tensor: Computed radius.
"""
height, width = det_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)
r1 = (b1 + sq1) / 2
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)
r2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
return min(r1, r2, r3)
def get_ellip_gaussian_2D(heatmap, center, radius_x, radius_y, k=1):
"""Generate 2D ellipse gaussian heatmap.
Args:
heatmap (Tensor): Input heatmap, the gaussian kernel will cover on
it and maintain the max value.
center (list[int]): Coord of gaussian kernel's center.
radius_x (int): X-axis radius of gaussian kernel.
radius_y (int): Y-axis radius of gaussian kernel.
k (int, optional): Coefficient of gaussian kernel. Default: 1.
Returns:
out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.
"""
diameter_x, diameter_y = 2 * radius_x + 1, 2 * radius_y + 1
gaussian_kernel = ellip_gaussian2D((radius_x, radius_y),
sigma_x=diameter_x / 6,
sigma_y=diameter_y / 6,
dtype=heatmap.dtype,
device=heatmap.device)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius_x), min(width - x, radius_x + 1)
top, bottom = min(y, radius_y), min(height - y, radius_y + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian_kernel[radius_y - top:radius_y + bottom,
radius_x - left:radius_x + right]
out_heatmap = heatmap
torch.max(
masked_heatmap,
masked_gaussian * k,
out=out_heatmap[y - top:y + bottom, x - left:x + right])
return out_heatmap
def ellip_gaussian2D(radius,
sigma_x,
sigma_y,
dtype=torch.float32,
device='cpu'):
"""Generate 2D ellipse gaussian kernel.
Args:
radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian
kernel.
sigma_x (int): X-axis sigma of gaussian function.
sigma_y (int): Y-axis sigma of gaussian function.
dtype (torch.dtype, optional): Dtype of gaussian tensor.
Default: torch.float32.
device (str, optional): Device of gaussian tensor.
Default: 'cpu'.
Returns:
h (Tensor): Gaussian kernel with a
``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.
"""
x = torch.arange(
-radius[0], radius[0] + 1, dtype=dtype, device=device).view(1, -1)
y = torch.arange(
-radius[1], radius[1] + 1, dtype=dtype, device=device).view(-1, 1)
h = (-(x * x) / (2 * sigma_x * sigma_x) - (y * y) /
(2 * sigma_y * sigma_y)).exp()
h[h < torch.finfo(h.dtype).eps * h.max()] = 0
return h
# Copyright (c) OpenMMLab. All rights reserved.
from .show_result import (show_multi_modality_result, show_result,
show_seg_result)
__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import cv2
import numpy as np
import torch
from matplotlib import pyplot as plt
def project_pts_on_img(points,
raw_img,
lidar2img_rt,
max_distance=70,
thickness=-1):
"""Project the 3D points cloud on 2D image.
Args:
points (numpy.array): 3D points cloud (x, y, z) to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
max_distance (float, optional): the max distance of the points cloud.
Default: 70.
thickness (int, optional): The thickness of 2D points. Default: -1.
"""
img = raw_img.copy()
num_points = points.shape[0]
pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
pts_2d = pts_4d @ lidar2img_rt.T
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
fov_inds = ((pts_2d[:, 0] < img.shape[1])
& (pts_2d[:, 0] >= 0)
& (pts_2d[:, 1] < img.shape[0])
& (pts_2d[:, 1] >= 0))
imgfov_pts_2d = pts_2d[fov_inds, :3] # u, v, d
cmap = plt.cm.get_cmap('hsv', 256)
cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
for i in range(imgfov_pts_2d.shape[0]):
depth = imgfov_pts_2d[i, 2]
color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
cv2.circle(
img,
center=(int(np.round(imgfov_pts_2d[i, 0])),
int(np.round(imgfov_pts_2d[i, 1]))),
radius=1,
color=tuple(color),
thickness=thickness,
)
cv2.imshow('project_pts_img', img.astype(np.uint8))
cv2.waitKey(100)
def plot_rect3d_on_img(img,
num_rects,
rect_corners,
color=(0, 255, 0),
thickness=1):
"""Plot the boundary lines of 3D rectangular on 2D images.
Args:
img (numpy.array): The numpy array of image.
num_rects (int): Number of 3D rectangulars.
rect_corners (numpy.array): Coordinates of the corners of 3D
rectangulars. Should be in the shape of [num_rect, 8, 2].
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
(4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
for i in range(num_rects):
corners = rect_corners[i].astype(np.int)
for start, end in line_indices:
cv2.line(img, (corners[start, 0], corners[start, 1]),
(corners[end, 0], corners[end, 1]), color, thickness,
cv2.LINE_AA)
return img.astype(np.uint8)
def draw_lidar_bbox3d_on_img(bboxes3d,
raw_img,
lidar2img_rt,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`LiDARInstance3DBoxes`):
3d bbox in lidar coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
img = raw_img.copy()
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
pts_4d = np.concatenate(
[corners_3d.reshape(-1, 3),
np.ones((num_bbox * 8, 1))], axis=-1)
lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
if isinstance(lidar2img_rt, torch.Tensor):
lidar2img_rt = lidar2img_rt.cpu().numpy()
pts_2d = pts_4d @ lidar2img_rt.T
pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
# TODO: remove third parameter in all functions here in favour of img_metas
def draw_depth_bbox3d_on_img(bboxes3d,
raw_img,
calibs,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
3d bbox in depth coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
calibs (dict): Camera calibration information, Rt and K.
img_metas (dict): Used in coordinates transformation.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.models import apply_3d_transformation
img = raw_img.copy()
img_metas = copy.deepcopy(img_metas)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
# first reverse the data transformations
xyz_depth = apply_3d_transformation(
points_3d, 'DEPTH', img_metas, reverse=True)
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(xyz_depth,
xyz_depth.new_tensor(img_metas['depth2img']))
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
def draw_camera_bbox3d_on_img(bboxes3d,
raw_img,
cam2img,
img_metas,
color=(0, 255, 0),
thickness=1):
"""Project the 3D bbox on 2D plane and draw on input image.
Args:
bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
3d bbox in camera coordinate system to visualize.
raw_img (numpy.array): The numpy array of image.
cam2img (dict): Camera intrinsic matrix,
denoted as `K` in depth bbox coordinate system.
img_metas (dict): Useless here.
color (tuple[int], optional): The color to draw bboxes.
Default: (0, 255, 0).
thickness (int, optional): The thickness of bboxes. Default: 1.
"""
from mmdet3d.core.bbox import points_cam2img
img = raw_img.copy()
cam2img = copy.deepcopy(cam2img)
corners_3d = bboxes3d.corners
num_bbox = corners_3d.shape[0]
points_3d = corners_3d.reshape(-1, 3)
if not isinstance(cam2img, torch.Tensor):
cam2img = torch.from_numpy(np.array(cam2img))
assert (cam2img.shape == torch.Size([3, 3])
or cam2img.shape == torch.Size([4, 4]))
cam2img = cam2img.float().cpu()
# project to 2d to get image coords (uv)
uv_origin = points_cam2img(points_3d, cam2img)
uv_origin = (uv_origin - 1).round()
imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import numpy as np
import torch
try:
import open3d as o3d
from open3d import geometry
except ImportError:
raise ImportError(
'Please run "pip install open3d" to install open3d first.')
def _draw_points(points,
vis,
points_size=2,
point_color=(0.5, 0.5, 0.5),
mode='xyz'):
"""Draw points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
Returns:
tuple: points, color of each point.
"""
vis.get_render_option().point_size = points_size # set points size
if isinstance(points, torch.Tensor):
points = points.cpu().numpy()
points = points.copy()
pcd = geometry.PointCloud()
if mode == 'xyz':
pcd.points = o3d.utility.Vector3dVector(points[:, :3])
points_colors = np.tile(np.array(point_color), (points.shape[0], 1))
elif mode == 'xyzrgb':
pcd.points = o3d.utility.Vector3dVector(points[:, :3])
points_colors = points[:, 3:6]
# normalize to [0, 1] for open3d drawing
if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
points_colors /= 255.0
else:
raise NotImplementedError
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.add_geometry(pcd)
return pcd, points_colors
def _draw_bboxes(bbox3d,
vis,
points_colors,
pcd=None,
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox on visualizer and change the color of points inside bbox3d.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points inside bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if isinstance(bbox3d, torch.Tensor):
bbox3d = bbox3d.cpu().numpy()
bbox3d = bbox3d.copy()
in_box_color = np.array(points_in_box_color)
for i in range(len(bbox3d)):
center = bbox3d[i, 0:3]
dim = bbox3d[i, 3:6]
yaw = np.zeros(3)
yaw[rot_axis] = bbox3d[i, 6]
rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
if center_mode == 'lidar_bottom':
center[rot_axis] += dim[
rot_axis] / 2 # bottom center to gravity center
elif center_mode == 'camera_bottom':
center[rot_axis] -= dim[
rot_axis] / 2 # bottom center to gravity center
box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
line_set.paint_uniform_color(bbox_color)
# draw bboxes on visualizer
vis.add_geometry(line_set)
# change the color of points which are in box
if pcd is not None and mode == 'xyz':
indices = box3d.get_point_indices_within_bounding_box(pcd.points)
points_colors[indices] = in_box_color
# update points colors
if pcd is not None:
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.update_geometry(pcd)
def show_pts_boxes(points,
bbox3d=None,
show=True,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox and points on visualizer.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is bottom
center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points, available
mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert 0 <= rot_axis <= 2
# init visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
vis.add_geometry(mesh_frame)
# draw points
pcd, points_colors = _draw_points(points, vis, points_size, point_color,
mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,
points_in_box_color, rot_axis, center_mode, mode)
if show:
vis.run()
if save_path is not None:
vis.capture_screen_image(save_path)
vis.destroy_window()
def _draw_bboxes_ind(bbox3d,
vis,
indices,
points_colors,
pcd=None,
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox on visualizer and change the color or points inside bbox3d
with indices.
Args:
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
indices (numpy.array | torch.tensor, shape=[N, M]):
indicate which bbox3d that each point lies in.
points_colors (numpy.array): color of each points.
pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
Default: None.
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
if isinstance(bbox3d, torch.Tensor):
bbox3d = bbox3d.cpu().numpy()
if isinstance(indices, torch.Tensor):
indices = indices.cpu().numpy()
bbox3d = bbox3d.copy()
in_box_color = np.array(points_in_box_color)
for i in range(len(bbox3d)):
center = bbox3d[i, 0:3]
dim = bbox3d[i, 3:6]
yaw = np.zeros(3)
# TODO: fix problem of current coordinate system
# dim[0], dim[1] = dim[1], dim[0] # for current coordinate
# yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
yaw[rot_axis] = -bbox3d[i, 6]
rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
if center_mode == 'lidar_bottom':
center[rot_axis] += dim[
rot_axis] / 2 # bottom center to gravity center
elif center_mode == 'camera_bottom':
center[rot_axis] -= dim[
rot_axis] / 2 # bottom center to gravity center
box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
line_set.paint_uniform_color(bbox_color)
# draw bboxes on visualizer
vis.add_geometry(line_set)
# change the color of points which are in box
if pcd is not None and mode == 'xyz':
points_colors[indices[:, i].astype(np.bool)] = in_box_color
# update points colors
if pcd is not None:
pcd.colors = o3d.utility.Vector3dVector(points_colors)
vis.update_geometry(pcd)
def show_pts_index_boxes(points,
bbox3d=None,
show=True,
indices=None,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
"""Draw bbox and points on visualizer with indices that indicate which
bbox3d that each point lies in.
Args:
points (numpy.array | torch.tensor, shape=[N, 3+C]):
points to visualize.
bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
Defaults to None.
show (bool, optional): whether to show the visualization results.
Default: True.
indices (numpy.array | torch.tensor, shape=[N, M], optional):
indicate which bbox3d that each point lies in. Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
# TODO: support score and class info
assert 0 <= rot_axis <= 2
# init visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
vis.add_geometry(mesh_frame)
# draw points
pcd, points_colors = _draw_points(points, vis, points_size, point_color,
mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,
points_in_box_color, rot_axis, center_mode, mode)
if show:
vis.run()
if save_path is not None:
vis.capture_screen_image(save_path)
vis.destroy_window()
class Visualizer(object):
r"""Online visualizer implemented with Open3d.
Args:
points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
cloud is in mode of Coord3DMode.DEPTH (please refer to
core.structures.coord_3d_mode).
bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
(x, y, z, x_size, y_size, z_size, yaw) to visualize.
The 3D bbox is in mode of Box3DMode.DEPTH with
gravity_center (please refer to core.structures.box_3d_mode).
Default: None.
save_path (str, optional): path to save visualized results.
Default: None.
points_size (int, optional): the size of points to show on visualizer.
Default: 2.
point_color (tuple[float], optional): the color of points.
Default: (0.5, 0.5, 0.5).
bbox_color (tuple[float], optional): the color of bbox.
Default: (0, 1, 0).
points_in_box_color (tuple[float], optional):
the color of points which are in bbox3d. Default: (1, 0, 0).
rot_axis (int, optional): rotation axis of bbox. Default: 2.
center_mode (bool, optional): indicate the center of bbox is
bottom center or gravity center. available mode
['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
mode (str, optional): indicate type of the input points,
available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
"""
def __init__(self,
points,
bbox3d=None,
save_path=None,
points_size=2,
point_color=(0.5, 0.5, 0.5),
bbox_color=(0, 1, 0),
points_in_box_color=(1, 0, 0),
rot_axis=2,
center_mode='lidar_bottom',
mode='xyz'):
super(Visualizer, self).__init__()
assert 0 <= rot_axis <= 2
# init visualizer
self.o3d_visualizer = o3d.visualization.Visualizer()
self.o3d_visualizer.create_window()
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0]) # create coordinate frame
self.o3d_visualizer.add_geometry(mesh_frame)
self.points_size = points_size
self.point_color = point_color
self.bbox_color = bbox_color
self.points_in_box_color = points_in_box_color
self.rot_axis = rot_axis
self.center_mode = center_mode
self.mode = mode
self.seg_num = 0
# draw points
if points is not None:
self.pcd, self.points_colors = _draw_points(
points, self.o3d_visualizer, points_size, point_color, mode)
# draw boxes
if bbox3d is not None:
_draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,
self.pcd, bbox_color, points_in_box_color, rot_axis,
center_mode, mode)
def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):
"""Add bounding box to visualizer.
Args:
bbox3d (numpy.array, shape=[M, 7]):
3D bbox (x, y, z, x_size, y_size, z_size, yaw)
to be visualized. The 3d bbox is in mode of
Box3DMode.DEPTH with gravity_center (please refer to
core.structures.box_3d_mode).
bbox_color (tuple[float]): the color of bbox. Default: None.
points_in_box_color (tuple[float]): the color of points which
are in bbox3d. Default: None.
"""
if bbox_color is None:
bbox_color = self.bbox_color
if points_in_box_color is None:
points_in_box_color = self.points_in_box_color
_draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,
bbox_color, points_in_box_color, self.rot_axis,
self.center_mode, self.mode)
def add_seg_mask(self, seg_mask_colors):
"""Add segmentation mask to visualizer via per-point colorization.
Args:
seg_mask_colors (numpy.array, shape=[N, 6]):
The segmentation mask whose first 3 dims are point coordinates
and last 3 dims are converted colors.
"""
# we can't draw the colors on existing points
# in case gt and pred mask would overlap
# instead we set a large offset along x-axis for each seg mask
self.seg_num += 1
offset = (np.array(self.pcd.points).max(0) -
np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[offset, 0, 0]) # create coordinate frame for seg
self.o3d_visualizer.add_geometry(mesh_frame)
seg_points = copy.deepcopy(seg_mask_colors)
seg_points[:, 0] += offset
_draw_points(
seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')
def show(self, save_path=None):
"""Visualize the points cloud.
Args:
save_path (str, optional): path to save image. Default: None.
"""
self.o3d_visualizer.run()
if save_path is not None:
self.o3d_visualizer.capture_screen_image(save_path)
self.o3d_visualizer.destroy_window()
return
# Copyright (c) OpenMMLab. All rights reserved.
from os import path as osp
import mmcv
import numpy as np
import trimesh
from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
draw_lidar_bbox3d_on_img)
def _write_obj(points, out_filename):
"""Write points into ``obj`` format for meshlab visualization.
Args:
points (np.ndarray): Points in shape (N, dim).
out_filename (str): Filename to be saved.
"""
N = points.shape[0]
fout = open(out_filename, 'w')
for i in range(N):
if points.shape[1] == 6:
c = points[i, 3:].astype(int)
fout.write(
'v %f %f %f %d %d %d\n' %
(points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
else:
fout.write('v %f %f %f\n' %
(points[i, 0], points[i, 1], points[i, 2]))
fout.close()
def _write_oriented_bbox(scene_bbox, out_filename):
"""Export oriented (around Z axis) scene bbox to meshes.
Args:
scene_bbox(list[ndarray] or ndarray): xyz pos of center and
3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
Y forward, X right, Z upward. heading angle of positive X is 0,
heading angle of positive Y is 90 degrees.
out_filename(str): Filename.
"""
def heading2rotmat(heading_angle):
rotmat = np.zeros((3, 3))
rotmat[2, 2] = 1
cosval = np.cos(heading_angle)
sinval = np.sin(heading_angle)
rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
return rotmat
def convert_oriented_box_to_trimesh_fmt(box):
ctr = box[:3]
lengths = box[3:6]
trns = np.eye(4)
trns[0:3, 3] = ctr
trns[3, 3] = 1.0
trns[0:3, 0:3] = heading2rotmat(box[6])
box_trimesh_fmt = trimesh.creation.box(lengths, trns)
return box_trimesh_fmt
if len(scene_bbox) == 0:
scene_bbox = np.zeros((1, 7))
scene = trimesh.scene.Scene()
for box in scene_bbox:
scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
mesh_list = trimesh.util.concatenate(scene.dump())
# save to obj file
trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
return
def show_result(points,
gt_bboxes,
pred_bboxes,
out_dir,
filename,
show=False,
snapshot=False,
pred_labels=None):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_bboxes (np.ndarray): Ground truth boxes.
pred_bboxes (np.ndarray): Predicted boxes.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
pred_labels (np.ndarray, optional): Predicted labels of boxes.
Defaults to None.
"""
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
if show:
from .open3d_vis import Visualizer
vis = Visualizer(points)
if pred_bboxes is not None:
if pred_labels is None:
vis.add_bboxes(bbox3d=pred_bboxes)
else:
palette = np.random.randint(
0, 255, size=(pred_labels.max() + 1, 3)) / 256
labelDict = {}
for j in range(len(pred_labels)):
i = int(pred_labels[j].numpy())
if labelDict.get(i) is None:
labelDict[i] = []
labelDict[i].append(pred_bboxes[j])
for i in labelDict:
vis.add_bboxes(
bbox3d=np.array(labelDict[i]),
bbox_color=palette[i],
points_in_box_color=palette[i])
if gt_bboxes is not None:
vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
show_path = osp.join(result_path,
f'{filename}_online.png') if snapshot else None
vis.show(show_path)
if points is not None:
_write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
if gt_bboxes is not None:
# bottom center to gravity center
gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2
_write_oriented_bbox(gt_bboxes,
osp.join(result_path, f'{filename}_gt.obj'))
if pred_bboxes is not None:
# bottom center to gravity center
pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2
_write_oriented_bbox(pred_bboxes,
osp.join(result_path, f'{filename}_pred.obj'))
def show_seg_result(points,
gt_seg,
pred_seg,
out_dir,
filename,
palette,
ignore_index=None,
show=False,
snapshot=False):
"""Convert results into format that is directly readable for meshlab.
Args:
points (np.ndarray): Points.
gt_seg (np.ndarray): Ground truth segmentation mask.
pred_seg (np.ndarray): Predicted segmentation mask.
out_dir (str): Path of output directory
filename (str): Filename of the current frame.
palette (np.ndarray): Mapping between class labels and colors.
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
snapshot (bool, optional): Whether to save the online results.
Defaults to False.
"""
# we need 3D coordinates to visualize segmentation mask
if gt_seg is not None or pred_seg is not None:
assert points is not None, \
'3D coordinates are required for segmentation visualization'
# filter out ignored points
if gt_seg is not None and ignore_index is not None:
if points is not None:
points = points[gt_seg != ignore_index]
if pred_seg is not None:
pred_seg = pred_seg[gt_seg != ignore_index]
gt_seg = gt_seg[gt_seg != ignore_index]
if gt_seg is not None:
gt_seg_color = palette[gt_seg]
gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)
if pred_seg is not None:
pred_seg_color = palette[pred_seg]
pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],
axis=1)
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
# online visualization of segmentation mask
# we show three masks in a row, scene_points, gt_mask, pred_mask
if show:
from .open3d_vis import Visualizer
mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'
vis = Visualizer(points, mode=mode)
if gt_seg is not None:
vis.add_seg_mask(gt_seg_color)
if pred_seg is not None:
vis.add_seg_mask(pred_seg_color)
show_path = osp.join(result_path,
f'{filename}_online.png') if snapshot else None
vis.show(show_path)
if points is not None:
_write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
if gt_seg is not None:
_write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))
if pred_seg is not None:
_write_obj(pred_seg_color, osp.join(result_path,
f'{filename}_pred.obj'))
def show_multi_modality_result(img,
gt_bboxes,
pred_bboxes,
proj_mat,
out_dir,
filename,
box_mode='lidar',
img_metas=None,
show=False,
gt_bbox_color=(61, 102, 255),
pred_bbox_color=(241, 101, 72)):
"""Convert multi-modality detection results into 2D results.
Project the predicted 3D bbox to 2D image plane and visualize them.
Args:
img (np.ndarray): The numpy array of image in cv2 fashion.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
proj_mat (numpy.array, shape=[4, 4]): The projection matrix
according to the camera intrinsic parameters.
out_dir (str): Path of output directory.
filename (str): Filename of the current frame.
box_mode (str, optional): Coordinate system the boxes are in.
Should be one of 'depth', 'lidar' and 'camera'.
Defaults to 'lidar'.
img_metas (dict, optional): Used in projecting depth bbox.
Defaults to None.
show (bool, optional): Visualize the results online. Defaults to False.
gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (255, 102, 61).
pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Default: (72, 101, 241).
"""
if box_mode == 'depth':
draw_bbox = draw_depth_bbox3d_on_img
elif box_mode == 'lidar':
draw_bbox = draw_lidar_bbox3d_on_img
elif box_mode == 'camera':
draw_bbox = draw_camera_bbox3d_on_img
else:
raise NotImplementedError(f'unsupported box mode {box_mode}')
result_path = osp.join(out_dir, filename)
mmcv.mkdir_or_exist(result_path)
if show:
show_img = img.copy()
if gt_bboxes is not None:
show_img = draw_bbox(
gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
if pred_bboxes is not None:
show_img = draw_bbox(
pred_bboxes,
show_img,
proj_mat,
img_metas,
color=pred_bbox_color)
mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
if img is not None:
mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
if gt_bboxes is not None:
gt_img = draw_bbox(
gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
if pred_bboxes is not None:
pred_img = draw_bbox(
pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import build_voxel_generator
from .voxel_generator import VoxelGenerator
__all__ = ['build_voxel_generator', 'VoxelGenerator']
# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
from . import voxel_generator
def build_voxel_generator(cfg, **kwargs):
"""Builder of voxel generator."""
if isinstance(cfg, voxel_generator.VoxelGenerator):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(
cfg, voxel_generator, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
# Copyright (c) OpenMMLab. All rights reserved.
import numba
import numpy as np
class VoxelGenerator(object):
"""Voxel generator in numpy implementation.
Args:
voxel_size (list[float]): Size of a single voxel
point_cloud_range (list[float]): Range of points
max_num_points (int): Maximum number of points in a single voxel
max_voxels (int, optional): Maximum number of voxels.
Defaults to 20000.
"""
def __init__(self,
voxel_size,
point_cloud_range,
max_num_points,
max_voxels=20000):
point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32)
grid_size = (point_cloud_range[3:] -
point_cloud_range[:3]) / voxel_size
grid_size = np.round(grid_size).astype(np.int64)
self._voxel_size = voxel_size
self._point_cloud_range = point_cloud_range
self._max_num_points = max_num_points
self._max_voxels = max_voxels
self._grid_size = grid_size
def generate(self, points):
"""Generate voxels given points."""
return points_to_voxel(points, self._voxel_size,
self._point_cloud_range, self._max_num_points,
True, self._max_voxels)
@property
def voxel_size(self):
"""list[float]: Size of a single voxel."""
return self._voxel_size
@property
def max_num_points_per_voxel(self):
"""int: Maximum number of points per voxel."""
return self._max_num_points
@property
def point_cloud_range(self):
"""list[float]: Range of point cloud."""
return self._point_cloud_range
@property
def grid_size(self):
"""np.ndarray: The size of grids."""
return self._grid_size
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
indent = ' ' * (len(repr_str) + 1)
repr_str += f'(voxel_size={self._voxel_size},\n'
repr_str += indent + 'point_cloud_range='
repr_str += f'{self._point_cloud_range.tolist()},\n'
repr_str += indent + f'max_num_points={self._max_num_points},\n'
repr_str += indent + f'max_voxels={self._max_voxels},\n'
repr_str += indent + f'grid_size={self._grid_size.tolist()}'
repr_str += ')'
return repr_str
def points_to_voxel(points,
voxel_size,
coors_range,
max_points=35,
reverse_index=True,
max_voxels=20000):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
coors_range (list[float | tuple[float] | ndarray]): Voxel range.
format: xyzxyz, minmax
max_points (int): Indicate maximum points contained in a voxel.
reverse_index (bool): Whether return reversed coordinates.
if points has xyz format and reverse_index is True, output
coordinates will be zyx format, but points in features always
xyz format.
max_voxels (int): Maximum number of voxels this function creates.
For second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: [M, max_points, ndim] float tensor. only contain points.
coordinates: [M, 3] int32 tensor.
num_points_per_voxel: [M] int32 tensor.
"""
if not isinstance(voxel_size, np.ndarray):
voxel_size = np.array(voxel_size, dtype=points.dtype)
if not isinstance(coors_range, np.ndarray):
coors_range = np.array(coors_range, dtype=points.dtype)
voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
if reverse_index:
voxelmap_shape = voxelmap_shape[::-1]
# don't create large array in jit(nopython=True) code.
num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
voxels = np.zeros(
shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
if reverse_index:
voxel_num = _points_to_voxel_reverse_kernel(
points, voxel_size, coors_range, num_points_per_voxel,
coor_to_voxelidx, voxels, coors, max_points, max_voxels)
else:
voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,
num_points_per_voxel,
coor_to_voxelidx, voxels, coors,
max_points, max_voxels)
coors = coors[:voxel_num]
voxels = voxels[:voxel_num]
num_points_per_voxel = num_points_per_voxel[:voxel_num]
return voxels, coors, num_points_per_voxel
@numba.jit(nopython=True)
def _points_to_voxel_reverse_kernel(points,
voxel_size,
coors_range,
num_points_per_voxel,
coor_to_voxelidx,
voxels,
coors,
max_points=35,
max_voxels=20000):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
format: xyzxyz, minmax
num_points_per_voxel (int): Number of points per voxel.
coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
which has the same shape as the complete voxel map. It indicates
the index of each corresponding voxel.
voxels (np.ndarray): Created empty voxels.
coors (np.ndarray): Created coordinates of each voxel.
max_points (int): Indicate maximum points contained in a voxel.
max_voxels (int): Maximum number of voxels this function create.
for second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: Shape [M, max_points, ndim], only contain points.
coordinates: Shape [M, 3].
num_points_per_voxel: Shape [M].
"""
# put all computations to one loop.
# we shouldn't create large array in main jit code, otherwise
# reduce performance
N = points.shape[0]
# ndim = points.shape[1] - 1
ndim = 3
ndim_minus_1 = ndim - 1
grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
# np.round(grid_size)
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
coor = np.zeros(shape=(3, ), dtype=np.int32)
voxel_num = 0
failed = False
for i in range(N):
failed = False
for j in range(ndim):
c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
if c < 0 or c >= grid_size[j]:
failed = True
break
coor[ndim_minus_1 - j] = c
if failed:
continue
voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
if voxelidx == -1:
voxelidx = voxel_num
if voxel_num >= max_voxels:
continue
voxel_num += 1
coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
coors[voxelidx] = coor
num = num_points_per_voxel[voxelidx]
if num < max_points:
voxels[voxelidx, num] = points[i]
num_points_per_voxel[voxelidx] += 1
return voxel_num
@numba.jit(nopython=True)
def _points_to_voxel_kernel(points,
voxel_size,
coors_range,
num_points_per_voxel,
coor_to_voxelidx,
voxels,
coors,
max_points=35,
max_voxels=20000):
"""convert kitti points(N, >=3) to voxels.
Args:
points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size.
coors_range (list[float | tuple[float] | ndarray]): Range of voxels.
format: xyzxyz, minmax
num_points_per_voxel (int): Number of points per voxel.
coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W),
which has the same shape as the complete voxel map. It indicates
the index of each corresponding voxel.
voxels (np.ndarray): Created empty voxels.
coors (np.ndarray): Created coordinates of each voxel.
max_points (int): Indicate maximum points contained in a voxel.
max_voxels (int): Maximum number of voxels this function create.
for second, 20000 is a good choice. Points should be shuffled for
randomness before this function because max_voxels drops points.
Returns:
tuple[np.ndarray]:
voxels: Shape [M, max_points, ndim], only contain points.
coordinates: Shape [M, 3].
num_points_per_voxel: Shape [M].
"""
N = points.shape[0]
# ndim = points.shape[1] - 1
ndim = 3
grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
# lower_bound = coors_range[:3]
# upper_bound = coors_range[3:]
coor = np.zeros(shape=(3, ), dtype=np.int32)
voxel_num = 0
failed = False
for i in range(N):
failed = False
for j in range(ndim):
c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
if c < 0 or c >= grid_size[j]:
failed = True
break
coor[j] = c
if failed:
continue
voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
if voxelidx == -1:
voxelidx = voxel_num
if voxel_num >= max_voxels:
continue
voxel_num += 1
coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
coors[voxelidx] = coor
num = num_points_per_voxel[voxelidx]
if num < max_points:
voxels[voxelidx, num] = points[i]
num_points_per_voxel[voxelidx] += 1
return voxel_num
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.datasets.builder import build_dataloader
from .builder import DATASETS, PIPELINES, build_dataset
from .custom_3d import Custom3DDataset
from .custom_3d_seg import Custom3DSegDataset
from .kitti_dataset import KittiDataset
from .kitti_mono_dataset import KittiMonoDataset
from .lyft_dataset import LyftDataset
from .nuscenes_dataset import NuScenesDataset
from .nuscenes_mono_dataset import NuScenesMonoDataset
# yapf: disable
from .pipelines import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
GlobalRotScaleTrans, IndoorPatchPointSample,
IndoorPointSample, LoadAnnotations3D,
LoadPointsFromDict, LoadPointsFromFile,
LoadPointsFromMultiSweeps, MultiViewWrapper,
NormalizePointsColor, ObjectNameFilter, ObjectNoise,
ObjectRangeFilter, ObjectSample, PointSample,
PointShuffle, PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomRotate,
RandomShiftScale, RangeLimitedRandomCrop,
VoxelBasedPointSampler)
# yapf: enable
from .s3dis_dataset import S3DISDataset, S3DISSegDataset
from .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset,
ScanNetSegDataset)
from .semantickitti_dataset import SemanticKITTIDataset
from .sunrgbd_dataset import SUNRGBDDataset
from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset
__all__ = [
'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS',
'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',
'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',
'ScanNetDataset', 'ScanNetSegDataset', 'ScanNetInstanceSegDataset',
'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset',
'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'PIPELINES',
'RangeLimitedRandomCrop', 'RandomRotate', 'MultiViewWrapper'
]
# Copyright (c) OpenMMLab. All rights reserved.
import platform
from mmcv.utils import Registry, build_from_cfg
from mmdet.datasets import DATASETS as MMDET_DATASETS
from mmdet.datasets.builder import _concat_dataset
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
base_soft_limit = rlimit[0]
hard_limit = rlimit[1]
soft_limit = min(max(4096, base_soft_limit), hard_limit)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
OBJECTSAMPLERS = Registry('Object sampler')
DATASETS = Registry('dataset')
PIPELINES = Registry('pipeline')
def build_dataset(cfg, default_args=None):
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
ConcatDataset, RepeatDataset)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'ConcatDataset':
dataset = ConcatDataset(
[build_dataset(c, default_args) for c in cfg['datasets']],
cfg.get('separate_eval', True))
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
build_dataset(cfg['dataset'], default_args), cfg['times'])
elif cfg['type'] == 'ClassBalancedDataset':
dataset = ClassBalancedDataset(
build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
elif cfg['type'] == 'CBGSDataset':
dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
elif cfg['type'] in DATASETS._module_dict.keys():
dataset = build_from_cfg(cfg, DATASETS, default_args)
else:
dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)
return dataset
# Copyright (c) OpenMMLab. All rights reserved.
import tempfile
import warnings
from os import path as osp
import mmcv
import numpy as np
from torch.utils.data import Dataset
from ..core.bbox import get_box_type
from .builder import DATASETS
from .pipelines import Compose
from .utils import extract_result_dict, get_loading_pipeline
@DATASETS.register_module()
class Custom3DDataset(Dataset):
"""Customized 3D dataset.
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
.. code-block:: none
[
{'sample_idx':
'lidar_points': {'lidar_path': velodyne_path,
....
},
'annos': {'box_type_3d': (str) 'LiDAR/Camera/Depth'
'gt_bboxes_3d': <np.ndarray> (n, 7)
'gt_names': [list]
....
}
'calib': { .....}
'images': { .....}
}
]
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
def __init__(self,
data_root,
ann_file,
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
file_client_args=dict(backend='disk')):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
self.CLASSES = self.get_classes(classes)
self.file_client = mmcv.FileClient(**file_client_args)
self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
# load annotations
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(self.ann_file) as local_path:
self.data_infos = self.load_annotations(open(local_path, 'rb'))
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {self.ann_file} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
self.data_infos = self.load_annotations(self.ann_file)
# process pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
# set group flag for the samplers
if not self.test_mode:
self._set_group_flag()
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
# loading data from a file-like object needs file format
return mmcv.load(ann_file, file_format='pkl')
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
sample_idx = info['sample_idx']
pts_filename = osp.join(self.data_root,
info['lidar_points']['lidar_path'])
input_dict = dict(
pts_filename=pts_filename,
sample_idx=sample_idx,
file_name=pts_filename)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():
return None
return input_dict
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info = self.data_infos[index]
gt_bboxes_3d = info['annos']['gt_bboxes_3d']
gt_names_3d = info['annos']['gt_names']
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
# Obtain original box 3d type in info file
ori_box_type_3d = info['annos']['box_type_3d']
ori_box_type_3d, _ = get_box_type(ori_box_type_3d)
# turn original box type to target box type
gt_bboxes_3d = ori_box_type_3d(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_names=gt_names_3d)
return anns_results
def pre_pipeline(self, results):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- bbox3d_fields (list): 3D bounding boxes fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- bbox_fields (list): Fields of bounding boxes.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
- box_type_3d (str): 3D box type.
- box_mode_3d (str): 3D box mode.
"""
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
results['bbox_fields'] = []
results['mask_fields'] = []
results['seg_fields'] = []
results['box_type_3d'] = self.box_type_3d
results['box_mode_3d'] = self.box_mode_3d
def prepare_train_data(self, index):
"""Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
if input_dict is None:
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if self.filter_empty_gt and \
(example is None or
~(example['gt_labels_3d']._data != -1).any()):
return None
return example
def prepare_test_data(self, index):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
@classmethod
def get_classes(cls, classes=None):
"""Get class names of current dataset.
Args:
classes (Sequence[str] | str): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: A list of class names.
"""
if classes is None:
return cls.CLASSES
if isinstance(classes, str):
# take it as a file path
class_names = mmcv.list_from_file(classes)
elif isinstance(classes, (tuple, list)):
class_names = classes
else:
raise ValueError(f'Unsupported type {type(classes)} of classes.')
return class_names
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (outputs, tmp_dir), outputs is the detection results,
tmp_dir is the temporal directory created for saving json
files when ``jsonfile_prefix`` is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
out = f'{pklfile_prefix}.pkl'
mmcv.dump(outputs, out)
return outputs, tmp_dir
def evaluate(self,
results,
metric=None,
iou_thr=(0.25, 0.5),
logger=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Defaults to None.
iou_thr (list[float]): AP IoU thresholds. Defaults to (0.25, 0.5).
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Defaults to None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
from mmdet3d.core.evaluation import indoor_eval
assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, 'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance(
results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.'
gt_annos = [info['annos'] for info in self.data_infos]
label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval(
gt_annos,
results,
iou_thr,
label2cat,
logger=logger,
box_type_3d=self.box_type_3d,
box_mode_3d=self.box_mode_3d)
if show:
self.show(results, out_dir, pipeline=pipeline)
return ret_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
raise NotImplementedError('_build_default_pipeline is not implemented '
f'for dataset {self.__class__.__name__}')
def _get_pipeline(self, pipeline):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict]): Input pipeline. If None is given,
get from self.pipeline.
"""
if pipeline is None:
if not hasattr(self, 'pipeline') or self.pipeline is None:
warnings.warn(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph')
return self._build_default_pipeline()
loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
return Compose(loading_pipeline)
return Compose(pipeline)
def _extract_data(self, index, pipeline, key, load_annos=False):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert pipeline is not None, 'data loading pipeline is not provided'
# when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
# we need to set self.test_mode as False so that we have 'annos'
if load_annos:
original_test_mode = self.test_mode
self.test_mode = False
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = pipeline(input_dict)
# extract data items according to keys
if isinstance(key, str):
data = extract_result_dict(example, key)
else:
data = [extract_result_dict(example, k) for k in key]
if load_annos:
self.test_mode = original_test_mode
return data
def __len__(self):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return len(self.data_infos)
def _rand_another(self, idx):
"""Randomly get another item with the same flag.
Returns:
int: Another index of item with the same flag.
"""
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0. In 3D datasets, they are all the same, thus are all
zeros.
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
# Copyright (c) OpenMMLab. All rights reserved.
import tempfile
import warnings
from os import path as osp
import mmcv
import numpy as np
from torch.utils.data import Dataset
from mmseg.datasets import DATASETS as SEG_DATASETS
from .builder import DATASETS
from .pipelines import Compose
from .utils import extract_result_dict, get_loading_pipeline
@DATASETS.register_module()
@SEG_DATASETS.register_module()
class Custom3DSegDataset(Dataset):
"""Customized 3D dataset for semantic segmentation task.
This is the base dataset of ScanNet and S3DIS dataset.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
palette (list[list[int]], optional): The palette of segmentation map.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
ignore_index (int, optional): The label index to be ignored, e.g.
unannotated points. If None is given, set to len(self.CLASSES) to
be consistent with PointSegClassMapping function in pipeline.
Defaults to None.
scene_idxs (np.ndarray | str, optional): Precomputed index to load
data. For scenes with many points, we may sample it several times.
Defaults to None.
"""
# names of all classes data used for the task
CLASSES = None
# class_ids used for training
VALID_CLASS_IDS = None
# all possible class_ids in loaded segmentation mask
ALL_CLASS_IDS = None
# official color for visualization
PALETTE = None
def __init__(self,
data_root,
ann_file,
pipeline=None,
classes=None,
palette=None,
modality=None,
test_mode=False,
ignore_index=None,
scene_idxs=None,
file_client_args=dict(backend='disk')):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.file_client = mmcv.FileClient(**file_client_args)
# load annotations
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(self.ann_file) as local_path:
self.data_infos = self.load_annotations(open(local_path, 'rb'))
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {self.ann_file} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
self.data_infos = self.load_annotations(self.ann_file)
if pipeline is not None:
self.pipeline = Compose(pipeline)
self.ignore_index = len(self.CLASSES) if \
ignore_index is None else ignore_index
self.scene_idxs = self.get_scene_idxs(scene_idxs)
self.CLASSES, self.PALETTE = \
self.get_classes_and_palette(classes, palette)
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
# loading data from a file-like object needs file format
return mmcv.load(ann_file, file_format='pkl')
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- file_name (str): Filename of point clouds.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
sample_idx = info['point_cloud']['lidar_idx']
pts_filename = osp.join(self.data_root, info['pts_path'])
input_dict = dict(
pts_filename=pts_filename,
sample_idx=sample_idx,
file_name=pts_filename)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def pre_pipeline(self, results):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
"""
results['img_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
results['mask_fields'] = []
results['seg_fields'] = []
results['bbox3d_fields'] = []
def prepare_train_data(self, index):
"""Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
if input_dict is None:
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def prepare_test_data(self, index):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def get_classes_and_palette(self, classes=None, palette=None):
"""Get class names of current dataset.
This function is taken from MMSegmentation.
Args:
classes (Sequence[str] | str): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Defaults to None.
palette (Sequence[Sequence[int]]] | np.ndarray):
The palette of segmentation map. If None is given, random
palette will be generated. Defaults to None.
"""
if classes is None:
self.custom_classes = False
# map id in the loaded mask to label used for training
self.label_map = {
cls_id: self.ignore_index
for cls_id in self.ALL_CLASS_IDS
}
self.label_map.update(
{cls_id: i
for i, cls_id in enumerate(self.VALID_CLASS_IDS)})
# map label to category name
self.label2cat = {
i: cat_name
for i, cat_name in enumerate(self.CLASSES)
}
return self.CLASSES, self.PALETTE
self.custom_classes = True
if isinstance(classes, str):
# take it as a file path
class_names = mmcv.list_from_file(classes)
elif isinstance(classes, (tuple, list)):
class_names = classes
else:
raise ValueError(f'Unsupported type {type(classes)} of classes.')
if self.CLASSES:
if not set(class_names).issubset(self.CLASSES):
raise ValueError('classes is not a subset of CLASSES.')
# update valid_class_ids
self.VALID_CLASS_IDS = [
self.VALID_CLASS_IDS[self.CLASSES.index(cls_name)]
for cls_name in class_names
]
# dictionary, its keys are the old label ids and its values
# are the new label ids.
# used for changing pixel labels in load_annotations.
self.label_map = {
cls_id: self.ignore_index
for cls_id in self.ALL_CLASS_IDS
}
self.label_map.update(
{cls_id: i
for i, cls_id in enumerate(self.VALID_CLASS_IDS)})
self.label2cat = {
i: cat_name
for i, cat_name in enumerate(class_names)
}
# modify palette for visualization
palette = [
self.PALETTE[self.CLASSES.index(cls_name)]
for cls_name in class_names
]
return class_names, palette
def get_scene_idxs(self, scene_idxs):
"""Compute scene_idxs for data sampling.
We sample more times for scenes with more points.
"""
if self.test_mode:
# when testing, we load one whole scene every time
return np.arange(len(self.data_infos)).astype(np.int32)
# we may need to re-sample different scenes according to scene_idxs
# this is necessary for indoor scene segmentation such as ScanNet
if scene_idxs is None:
scene_idxs = np.arange(len(self.data_infos))
if isinstance(scene_idxs, str):
with self.file_client.get_local_path(scene_idxs) as local_path:
scene_idxs = np.load(local_path)
else:
scene_idxs = np.array(scene_idxs)
return scene_idxs.astype(np.int32)
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (outputs, tmp_dir), outputs is the detection results,
tmp_dir is the temporal directory created for saving json
files when ``jsonfile_prefix`` is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
out = f'{pklfile_prefix}.pkl'
mmcv.dump(outputs, out)
return outputs, tmp_dir
def evaluate(self,
results,
metric=None,
logger=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluate.
Evaluation in semantic segmentation protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Defaults to None.
show (bool, optional): Whether to visualize.
Defaults to False.
out_dir (str, optional): Path to save the visualization results.
Defaults to None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
from mmdet3d.core.evaluation import seg_eval
assert isinstance(
results, list), f'Expect results to be list, got {type(results)}.'
assert len(results) > 0, 'Expect length of results > 0.'
assert len(results) == len(self.data_infos)
assert isinstance(
results[0], dict
), f'Expect elements in results to be dict, got {type(results[0])}.'
load_pipeline = self._get_pipeline(pipeline)
pred_sem_masks = [result['semantic_mask'] for result in results]
gt_sem_masks = [
self._extract_data(
i, load_pipeline, 'pts_semantic_mask', load_annos=True)
for i in range(len(self.data_infos))
]
ret_dict = seg_eval(
gt_sem_masks,
pred_sem_masks,
self.label2cat,
self.ignore_index,
logger=logger)
if show:
self.show(pred_sem_masks, out_dir, pipeline=pipeline)
return ret_dict
def _rand_another(self, idx):
"""Randomly get another item with the same flag.
Returns:
int: Another index of item with the same flag.
"""
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
raise NotImplementedError('_build_default_pipeline is not implemented '
f'for dataset {self.__class__.__name__}')
def _get_pipeline(self, pipeline):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict]): Input pipeline. If None is given,
get from self.pipeline.
"""
if pipeline is None:
if not hasattr(self, 'pipeline') or self.pipeline is None:
warnings.warn(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph')
return self._build_default_pipeline()
loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
return Compose(loading_pipeline)
return Compose(pipeline)
def _extract_data(self, index, pipeline, key, load_annos=False):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert pipeline is not None, 'data loading pipeline is not provided'
# when we want to load ground-truth via pipeline (e.g. bbox, seg mask)
# we need to set self.test_mode as False so that we have 'annos'
if load_annos:
original_test_mode = self.test_mode
self.test_mode = False
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = pipeline(input_dict)
# extract data items according to keys
if isinstance(key, str):
data = extract_result_dict(example, key)
else:
data = [extract_result_dict(example, k) for k in key]
if load_annos:
self.test_mode = original_test_mode
return data
def __len__(self):
"""Return the length of scene_idxs.
Returns:
int: Length of data infos.
"""
return len(self.scene_idxs)
def __getitem__(self, idx):
"""Get item from infos according to the given index.
In indoor scene segmentation task, each scene contains millions of
points. However, we only sample less than 10k points within a patch
each time. Therefore, we use `scene_idxs` to re-sample different rooms.
Returns:
dict: Data dictionary of the corresponding index.
"""
scene_idx = self.scene_idxs[idx] # map to scene idx
if self.test_mode:
return self.prepare_test_data(scene_idx)
while True:
data = self.prepare_train_data(scene_idx)
if data is None:
idx = self._rand_another(idx)
scene_idx = self.scene_idxs[idx] # map to scene idx
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0. In 3D datasets, they are all the same, thus are all
zeros.
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from .builder import DATASETS
@DATASETS.register_module()
class CBGSDataset(object):
"""A wrapper of class sampled dataset with ann_file path. Implementation of
paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
Detection <https://arxiv.org/abs/1908.09492.>`_.
Balance the number of scenes under different classes.
Args:
dataset (:obj:`CustomDataset`): The dataset to be class sampled.
"""
def __init__(self, dataset):
self.dataset = dataset
self.CLASSES = dataset.CLASSES
self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
self.sample_indices = self._get_sample_indices()
# self.dataset.data_infos = self.data_infos
if hasattr(self.dataset, 'flag'):
self.flag = np.array(
[self.dataset.flag[ind] for ind in self.sample_indices],
dtype=np.uint8)
def _get_sample_indices(self):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations after class sampling.
"""
class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}
for idx in range(len(self.dataset)):
sample_cat_ids = self.dataset.get_cat_ids(idx)
for cat_id in sample_cat_ids:
class_sample_idxs[cat_id].append(idx)
duplicated_samples = sum(
[len(v) for _, v in class_sample_idxs.items()])
class_distribution = {
k: len(v) / duplicated_samples
for k, v in class_sample_idxs.items()
}
sample_indices = []
frac = 1.0 / len(self.CLASSES)
ratios = [frac / v for v in class_distribution.values()]
for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):
sample_indices += np.random.choice(cls_inds,
int(len(cls_inds) *
ratio)).tolist()
return sample_indices
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
ori_idx = self.sample_indices[idx]
return self.dataset[ori_idx]
def __len__(self):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return len(self.sample_indices)
# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
import numpy as np
from mmdet.datasets import CustomDataset
from .builder import DATASETS
@DATASETS.register_module()
class Kitti2DDataset(CustomDataset):
r"""KITTI 2D Dataset.
This class serves as the API for experiments on the `KITTI Dataset
<http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
CLASSES = ('car', 'pedestrian', 'cyclist')
"""
Annotation format:
[
{
'image': {
'image_idx': 0,
'image_path': 'training/image_2/000000.png',
'image_shape': array([ 370, 1224], dtype=int32)
},
'point_cloud': {
'num_features': 4,
'velodyne_path': 'training/velodyne/000000.bin'
},
'calib': {
'P0': <np.ndarray> (4, 4),
'P1': <np.ndarray> (4, 4),
'P2': <np.ndarray> (4, 4),
'P3': <np.ndarray> (4, 4),
'R0_rect':4x4 np.array,
'Tr_velo_to_cam': 4x4 np.array,
'Tr_imu_to_velo': 4x4 np.array
},
'annos': {
'name': <np.ndarray> (n),
'truncated': <np.ndarray> (n),
'occluded': <np.ndarray> (n),
'alpha': <np.ndarray> (n),
'bbox': <np.ndarray> (n, 4),
'dimensions': <np.ndarray> (n, 3),
'location': <np.ndarray> (n, 3),
'rotation_y': <np.ndarray> (n),
'score': <np.ndarray> (n),
'index': array([0], dtype=int32),
'group_ids': array([0], dtype=int32),
'difficulty': array([0], dtype=int32),
'num_points_in_gt': <np.ndarray> (n),
}
}
]
"""
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
self.data_infos = mmcv.load(ann_file)
self.cat2label = {
cat_name: i
for i, cat_name in enumerate(self.CLASSES)
}
return self.data_infos
def _filter_imgs(self, min_size=32):
"""Filter images without ground truths."""
valid_inds = []
for i, img_info in enumerate(self.data_infos):
if len(img_info['annos']['name']) > 0:
valid_inds.append(i)
return valid_inds
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- bboxes (np.ndarray): Ground truth bboxes.
- labels (np.ndarray): Labels of ground truths.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
annos = info['annos']
gt_names = annos['name']
gt_bboxes = annos['bbox']
difficulty = annos['difficulty']
# remove classes that is not needed
selected = self.keep_arrays_by_name(gt_names, self.CLASSES)
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
difficulty = difficulty[selected]
gt_labels = np.array([self.cat2label[n] for n in gt_names])
anns_results = dict(
bboxes=gt_bboxes.astype(np.float32),
labels=gt_labels,
)
return anns_results
def prepare_train_img(self, idx):
"""Training image preparation.
Args:
index (int): Index for accessing the target image data.
Returns:
dict: Training image data dict after preprocessing
corresponding to the index.
"""
img_raw_info = self.data_infos[idx]['image']
img_info = dict(filename=img_raw_info['image_path'])
ann_info = self.get_ann_info(idx)
if len(ann_info['bboxes']) == 0:
return None
results = dict(img_info=img_info, ann_info=ann_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
def prepare_test_img(self, idx):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target image data.
Returns:
dict: Testing image data dict after preprocessing
corresponding to the index.
"""
img_raw_info = self.data_infos[idx]['image']
img_info = dict(filename=img_raw_info['image_path'])
results = dict(img_info=img_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
def drop_arrays_by_name(self, gt_names, used_classes):
"""Drop irrelevant ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be dropped.
"""
inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def keep_arrays_by_name(self, gt_names, used_classes):
"""Keep useful ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be keeped.
"""
inds = [i for i, x in enumerate(gt_names) if x in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def reformat_bbox(self, outputs, out=None):
"""Reformat bounding boxes to KITTI 2D styles.
Args:
outputs (list[np.ndarray]): List of arrays storing the inferenced
bounding boxes and scores.
out (str, optional): The prefix of output file.
Default: None.
Returns:
list[dict]: A list of dictionaries with the kitti 2D format.
"""
from mmdet3d.core.bbox.transforms import bbox2result_kitti2d
sample_idx = [info['image']['image_idx'] for info in self.data_infos]
result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,
out)
return result_files
def evaluate(self, result_files, eval_types=None):
"""Evaluation in KITTI protocol.
Args:
result_files (str): Path of result files.
eval_types (str, optional): Types of evaluation. Default: None.
KITTI dataset only support 'bbox' evaluation type.
Returns:
tuple (str, dict): Average precision results in str format
and average precision results in dict format.
"""
from mmdet3d.core.evaluation import kitti_eval
eval_types = ['bbox'] if not eval_types else eval_types
assert eval_types in ('bbox', ['bbox'
]), 'KITTI data set only evaluate bbox'
gt_annos = [info['annos'] for info in self.data_infos]
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
return ap_result_str, ap_dict
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
import tempfile
from os import path as osp
import mmcv
import numpy as np
import torch
from mmcv.utils import print_log
from ..core import show_multi_modality_result, show_result
from ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
LiDARInstance3DBoxes, points_cam2img)
from .builder import DATASETS
from .custom_3d import Custom3DDataset
from .pipelines import Compose
@DATASETS.register_module()
class KittiDataset(Custom3DDataset):
r"""KITTI Dataset.
This class serves as the API for experiments on the `KITTI Dataset
<http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
split (str): Split of input data.
pts_prefix (str, optional): Prefix of points files.
Defaults to 'velodyne'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [0, -40, -3, 70.4, 40, 0.0].
"""
CLASSES = ('car', 'pedestrian', 'cyclist')
def __init__(self,
data_root,
ann_file,
split,
pts_prefix='velodyne',
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0],
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
**kwargs)
self.split = split
self.root_split = os.path.join(self.data_root, split)
assert self.modality is not None
self.pcd_limit_range = pcd_limit_range
self.pts_prefix = pts_prefix
def _get_pts_filename(self, idx):
"""Get point cloud filename according to the given index.
Args:
index (int): Index of the point cloud file to get.
Returns:
str: Name of the point cloud file.
"""
pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:06d}.bin')
return pts_filename
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- img_prefix (str): Prefix of image files.
- img_info (dict): Image info.
- lidar2img (list[np.ndarray], optional): Transformations
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.data_root,
info['image']['image_path'])
# TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c
pts_filename = self._get_pts_filename(sample_idx)
input_dict = dict(
sample_idx=sample_idx,
pts_filename=pts_filename,
img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_bboxes (np.ndarray): 2D ground truth bboxes.
- gt_labels (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
- difficulty (int): Difficulty defined by KITTI.
0, 1, 2 represent xxxxx respectively.
"""
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
if 'plane' in info:
# convert ground plane to velodyne coordinates
reverse = np.linalg.inv(rect @ Trv2c)
(plane_norm_cam,
plane_off_cam) = (info['plane'][:3],
-info['plane'][:3] * info['plane'][3])
plane_norm_lidar = \
(reverse[:3, :3] @ plane_norm_cam[:, None])[:, 0]
plane_off_lidar = (
reverse[:3, :3] @ plane_off_cam[:, None][:, 0] +
reverse[:3, 3])
plane_lidar = np.zeros_like(plane_norm_lidar, shape=(4, ))
plane_lidar[:3] = plane_norm_lidar
plane_lidar[3] = -plane_norm_lidar.T @ plane_off_lidar
else:
plane_lidar = None
difficulty = info['annos']['difficulty']
annos = info['annos']
# we need other objects to avoid collision when sample
annos = self.remove_dontcare(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_names = annos['name']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
self.box_mode_3d, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected]
gt_labels = []
for cat in gt_names:
if cat in self.CLASSES:
gt_labels.append(self.CLASSES.index(cat))
else:
gt_labels.append(-1)
gt_labels = np.array(gt_labels).astype(np.int64)
gt_labels_3d = copy.deepcopy(gt_labels)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
bboxes=gt_bboxes,
labels=gt_labels,
gt_names=gt_names,
plane=plane_lidar,
difficulty=difficulty)
return anns_results
def drop_arrays_by_name(self, gt_names, used_classes):
"""Drop irrelevant ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be dropped.
"""
inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def keep_arrays_by_name(self, gt_names, used_classes):
"""Keep useful ground truths by name.
Args:
gt_names (list[str]): Names of ground truths.
used_classes (list[str]): Classes of interest.
Returns:
np.ndarray: Indices of ground truths that will be keeped.
"""
inds = [i for i, x in enumerate(gt_names) if x in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def remove_dontcare(self, ann_info):
"""Remove annotations that do not need to be cared.
Args:
ann_info (dict): Dict of annotation infos. The ``'DontCare'``
annotations will be removed according to ann_file['name'].
Returns:
dict: Annotations after filtering.
"""
img_filtered_annotations = {}
relevant_annotation_indices = [
i for i, x in enumerate(ann_info['name']) if x != 'DontCare'
]
for key in ann_info.keys():
img_filtered_annotations[key] = (
ann_info[key][relevant_annotation_indices])
return img_filtered_annotations
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
if not isinstance(outputs[0], dict):
result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = submission_prefix + name
else:
submission_prefix_ = None
if 'img' in name:
result_files = self.bbox2result_kitti2d(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
else:
result_files_ = self.bbox2result_kitti(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
return result_files, tmp_dir
def evaluate(self,
results,
metric=None,
logger=None,
pklfile_prefix=None,
submission_prefix=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
Default: None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bbox', 'bev', '3d']
if 'img' in name:
eval_types = ['bbox']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
if metric == 'img_bbox':
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.CLASSES)
print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir:
self.show(results, out_dir, show=show, pipeline=pipeline)
return ap_dict
def bbox2result_kitti(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert 3D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries with the kitti format.
"""
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
det_annos = []
print('\nConverting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.data_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
box_dict = self.convert_valid_bboxes(pred_dicts, info)
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(
-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
anno['score'].append(score)
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
else:
anno = {
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
}
annos.append(anno)
if submission_prefix is not None:
curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(curr_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(
anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1], bbox[idx][2],
bbox[idx][3], dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
annos[-1]['sample_idx'] = np.array(
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print(f'Result is saved to {out}.')
return det_annos
def bbox2result_kitti2d(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert 2D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries have the kitti format
"""
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
det_annos = []
print('\nConverting prediction to KITTI format')
for i, bboxes_per_sample in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
anno = dict(
name=[],
truncated=[],
occluded=[],
alpha=[],
bbox=[],
dimensions=[],
location=[],
rotation_y=[],
score=[])
sample_idx = self.data_infos[i]['image']['image_idx']
num_example = 0
for label in range(len(bboxes_per_sample)):
bbox = bboxes_per_sample[label]
for i in range(bbox.shape[0]):
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(0.0)
anno['bbox'].append(bbox[i, :4])
# set dimensions (height, width, length) to zero
anno['dimensions'].append(
np.zeros(shape=[3], dtype=np.float32))
# set the 3D translation to (-1000, -1000, -1000)
anno['location'].append(
np.ones(shape=[3], dtype=np.float32) * (-1000.0))
anno['rotation_y'].append(0.0)
anno['score'].append(bbox[i, 4])
num_example += 1
if num_example == 0:
annos.append(
dict(
name=np.array([]),
truncated=np.array([]),
occluded=np.array([]),
alpha=np.array([]),
bbox=np.zeros([0, 4]),
dimensions=np.zeros([0, 3]),
location=np.zeros([0, 3]),
rotation_y=np.array([]),
score=np.array([]),
))
else:
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
# save file in pkl format
pklfile_path = (
pklfile_prefix[:-4] if pklfile_prefix.endswith(
('.pkl', '.pickle')) else pklfile_prefix)
mmcv.dump(det_annos, pklfile_path)
if submission_prefix is not None:
# save file in submission format
mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos):
sample_idx = self.data_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'][::-1] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
'{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
anno['name'][idx],
anno['alpha'][idx],
*bbox[idx], # 4 float
*dims[idx], # 3 float
*loc[idx], # 3 float
anno['rotation_y'][idx],
anno['score'][idx]),
file=f,
)
print(f'Result is saved to {submission_prefix}')
return det_annos
def convert_valid_bboxes(self, box_dict, info):
"""Convert the predicted boxes into valid ones.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- box3d_lidar (np.ndarray): 3D bounding boxes in
LiDAR coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
# TODO: refactor this function
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if len(box_preds) == 0:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape']
P2 = box_preds.tensor.new_tensor(P2)
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners
box_corners_in_image = points_cam2img(box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check box_preds_camera
image_shape = box_preds.tensor.new_tensor(img_shape)
valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &
(box_2d_preds[:, 1] < image_shape[0]) &
(box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))
# check box_preds
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(box_preds.center < limit_range[3:]))
valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx)
else:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
if self.modality['use_camera']:
pipeline.insert(0, dict(type='LoadImageFromFile'))
return Compose(pipeline)
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
if 'pts_bbox' in result.keys():
result = result['pts_bbox']
data_info = self.data_infos[i]
pts_path = data_info['point_cloud']['velodyne_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points, img_metas, img = self._extract_data(
i, pipeline, ['points', 'img_metas', 'img'])
points = points.numpy()
# for now we convert points into depth mode
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
pred_bboxes = result['boxes_3d'].tensor.numpy()
show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
file_name, show)
# multi-modality visualization
if self.modality['use_camera'] and 'lidar2img' in img_metas.keys():
img = img.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
show_pred_bboxes = LiDARInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
show_gt_bboxes = LiDARInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
show_gt_bboxes,
show_pred_bboxes,
img_metas['lidar2img'],
out_dir,
file_name,
box_mode='lidar',
show=show)
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import tempfile
from os import path as osp
import mmcv
import numpy as np
import torch
from mmcv.utils import print_log
from ..core.bbox import Box3DMode, CameraInstance3DBoxes, points_cam2img
from .builder import DATASETS
from .nuscenes_mono_dataset import NuScenesMonoDataset
@DATASETS.register_module()
class KittiMonoDataset(NuScenesMonoDataset):
"""Monocular 3D detection on KITTI Dataset.
Args:
data_root (str): Path of dataset root.
info_file (str): Path of info file.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to False.
eval_version (str, optional): Configuration version of evaluation.
Defaults to None.
version (str, optional): Dataset version. Defaults to None.
kwargs (dict): Other arguments are the same of NuScenesMonoDataset.
"""
CLASSES = ('Pedestrian', 'Cyclist', 'Car')
def __init__(self,
data_root,
info_file,
ann_file,
pipeline,
load_interval=1,
with_velocity=False,
eval_version=None,
version=None,
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
load_interval=load_interval,
with_velocity=with_velocity,
eval_version=eval_version,
version=version,
**kwargs)
self.anno_infos = mmcv.load(info_file)
self.bbox_code_size = 7
def _parse_ann_info(self, img_info, ann_info):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, seg_map. "masks" are raw annotations and not
decoded into binary masks.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
gt_bboxes_cam3d = []
centers2d = []
depths = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
if inter_w * inter_h == 0:
continue
if ann['area'] <= 0 or w < 1 or h < 1:
continue
if ann['category_id'] not in self.cat_ids:
continue
bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
gt_masks_ann.append(ann.get('segmentation', None))
# 3D annotations in camera coordinates
bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(-1, )
gt_bboxes_cam3d.append(bbox_cam3d)
# 2.5D annotations in camera coordinates
center2d = ann['center2d'][:2]
depth = ann['center2d'][2]
centers2d.append(center2d)
depths.append(depth)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_cam3d:
gt_bboxes_cam3d = np.array(gt_bboxes_cam3d, dtype=np.float32)
centers2d = np.array(centers2d, dtype=np.float32)
depths = np.array(depths, dtype=np.float32)
else:
gt_bboxes_cam3d = np.zeros((0, self.bbox_code_size),
dtype=np.float32)
centers2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
gt_bboxes_cam3d = CameraInstance3DBoxes(
gt_bboxes_cam3d,
box_dim=gt_bboxes_cam3d.shape[-1],
origin=(0.5, 0.5, 0.5))
gt_labels_3d = copy.deepcopy(gt_labels)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
seg_map = img_info['filename'].replace('jpg', 'png')
ann = dict(
bboxes=gt_bboxes,
labels=gt_labels,
gt_bboxes_3d=gt_bboxes_cam3d,
gt_labels_3d=gt_labels_3d,
centers2d=centers2d,
depths=depths,
bboxes_ignore=gt_bboxes_ignore,
masks=gt_masks_ann,
seg_map=seg_map)
return ann
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
if not isinstance(outputs[0], dict):
result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0] or \
'img_bbox2d' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = submission_prefix + name
else:
submission_prefix_ = None
if '2d' in name:
result_files_ = self.bbox2result_kitti2d(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
else:
result_files_ = self.bbox2result_kitti(
results_, self.CLASSES, pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
return result_files, tmp_dir
def evaluate(self,
results,
metric=None,
logger=None,
pklfile_prefix=None,
submission_prefix=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Defaults to None.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.anno_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bbox', 'bev', '3d']
if '2d' in name:
eval_types = ['bbox']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
if metric == 'img_bbox2d':
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.CLASSES)
print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir:
self.show(results, out_dir, show=show, pipeline=pipeline)
return ap_dict
def bbox2result_kitti(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert 3D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries with the kitti format.
"""
assert len(net_outputs) == len(self.anno_infos)
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
det_annos = []
print('\nConverting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.anno_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
box_dict = self.convert_valid_bboxes(pred_dicts, info)
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(-np.arctan2(box[0], box[2]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
anno['score'].append(score)
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
else:
anno = {
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
}
annos.append(anno)
if submission_prefix is not None:
curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(curr_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(
anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1], bbox[idx][2],
bbox[idx][3], dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
annos[-1]['sample_idx'] = np.array(
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print('Result is saved to %s' % out)
return det_annos
def bbox2result_kitti2d(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert 2D detection results to kitti format for evaluation and test
submission.
Args:
net_outputs (list[np.ndarray]): List of array storing the
inferenced bounding boxes and scores.
class_names (list[String]): A list of class names.
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
list[dict]: A list of dictionaries have the kitti format
"""
assert len(net_outputs) == len(self.anno_infos)
det_annos = []
print('\nConverting prediction to KITTI format')
for i, bboxes_per_sample in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
anno = dict(
name=[],
truncated=[],
occluded=[],
alpha=[],
bbox=[],
dimensions=[],
location=[],
rotation_y=[],
score=[])
sample_idx = self.anno_infos[i]['image']['image_idx']
num_example = 0
for label in range(len(bboxes_per_sample)):
bbox = bboxes_per_sample[label]
for i in range(bbox.shape[0]):
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(-10)
anno['bbox'].append(bbox[i, :4])
# set dimensions (height, width, length) to zero
anno['dimensions'].append(
np.zeros(shape=[3], dtype=np.float32))
# set the 3D translation to (-1000, -1000, -1000)
anno['location'].append(
np.ones(shape=[3], dtype=np.float32) * (-1000.0))
anno['rotation_y'].append(0.0)
anno['score'].append(bbox[i, 4])
num_example += 1
if num_example == 0:
annos.append(
dict(
name=np.array([]),
truncated=np.array([]),
occluded=np.array([]),
alpha=np.array([]),
bbox=np.zeros([0, 4]),
dimensions=np.zeros([0, 3]),
location=np.zeros([0, 3]),
rotation_y=np.array([]),
score=np.array([]),
))
else:
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print('Result is saved to %s' % out)
if submission_prefix is not None:
# save file in submission format
mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos):
sample_idx = self.anno_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'][::-1] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
'{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
anno['name'][idx],
anno['alpha'][idx],
*bbox[idx], # 4 float
*dims[idx], # 3 float
*loc[idx], # 3 float
anno['rotation_y'][idx],
anno['score'][idx]),
file=f,
)
print(f'Result is saved to {submission_prefix}')
return det_annos
def convert_valid_bboxes(self, box_dict, info):
"""Convert the predicted boxes into valid ones.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`CameraInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
if len(box_preds) == 0:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape']
P2 = box_preds.tensor.new_tensor(P2)
box_preds_camera = box_preds
box_preds_lidar = box_preds.convert_to(Box3DMode.LIDAR,
np.linalg.inv(rect @ Trv2c))
box_corners = box_preds_camera.corners
box_corners_in_image = points_cam2img(box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check box_preds_camera
image_shape = box_preds.tensor.new_tensor(img_shape)
valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &
(box_2d_preds[:, 1] < image_shape[0]) &
(box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))
# check box_preds
valid_inds = valid_cam_inds
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds_lidar[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx)
else:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from os import path as osp
import mmcv
import numpy as np
import pandas as pd
from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
from lyft_dataset_sdk.utils.data_classes import Box as LyftBox
from pyquaternion import Quaternion
from mmdet3d.core.evaluation.lyft_eval import lyft_eval
from ..core import show_result
from ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
from .builder import DATASETS
from .custom_3d import Custom3DDataset
from .pipelines import Compose
@DATASETS.register_module()
class LyftDataset(Custom3DDataset):
r"""Lyft Dataset.
This class serves as the API for experiments on the Lyft Dataset.
Please refer to
`<https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
data_root (str): Path of dataset root.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
""" # noqa: E501
NameMapping = {
'bicycle': 'bicycle',
'bus': 'bus',
'car': 'car',
'emergency_vehicle': 'emergency_vehicle',
'motorcycle': 'motorcycle',
'other_vehicle': 'other_vehicle',
'pedestrian': 'pedestrian',
'truck': 'truck',
'animal': 'animal'
}
DefaultAttribute = {
'car': 'is_stationary',
'truck': 'is_stationary',
'bus': 'is_stationary',
'emergency_vehicle': 'is_stationary',
'other_vehicle': 'is_stationary',
'motorcycle': 'is_stationary',
'bicycle': 'is_stationary',
'pedestrian': 'is_stationary',
'animal': 'is_stationary'
}
CLASSES = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
'motorcycle', 'bicycle', 'pedestrian', 'animal')
def __init__(self,
ann_file,
pipeline=None,
data_root=None,
classes=None,
load_interval=1,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
**kwargs):
self.load_interval = load_interval
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
**kwargs)
if self.modality is None:
self.modality = dict(
use_camera=False,
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
)
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
# loading data from a file-like object needs file format
data = mmcv.load(ann_file, file_format='pkl')
data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
return data_infos
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- sweeps (list[dict]): infos of sweeps
- timestamp (float): sample timestamp
- img_filename (str, optional): image filename
- lidar2img (list[np.ndarray], optional): transformations
from lidar to different cameras
- ann_info (dict): annotation info
"""
info = self.data_infos[index]
# standard protocol modified from SECOND.Pytorch
input_dict = dict(
sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
timestamp=info['timestamp'] / 1e6,
)
if self.modality['use_camera']:
image_paths = []
lidar2img_rts = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['data_path'])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
'sensor2lidar_translation'] @ lidar2cam_r.T
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = cam_info['cam_intrinsic']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt.T)
lidar2img_rts.append(lidar2img_rt)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
))
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info = self.data_infos[index]
gt_bboxes_3d = info['gt_boxes']
gt_names_3d = info['gt_names']
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
if 'gt_shape' in info:
gt_shape = info['gt_shape']
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_shape], axis=-1)
# the lyft box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
)
return anns_results
def _format_bbox(self, results, jsonfile_prefix=None):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
lyft_annos = {}
mapped_class_names = self.CLASSES
print('Start to convert detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
annos = []
boxes = output_to_lyft_box(det)
sample_token = self.data_infos[sample_id]['token']
boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes)
for i, box in enumerate(boxes):
name = mapped_class_names[box.label]
lyft_anno = dict(
sample_token=sample_token,
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
name=name,
score=box.score)
annos.append(lyft_anno)
lyft_annos[sample_token] = annos
lyft_submissions = {
'meta': self.modality,
'results': lyft_annos,
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'results_lyft.json')
print('Results writes to', res_path)
mmcv.dump(lyft_submissions, res_path)
return res_path
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='pts_bbox'):
"""Evaluation for a single model in Lyft protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
output_dir = osp.join(*osp.split(result_path)[:-1])
lyft = Lyft(
data_path=osp.join(self.data_root, self.version),
json_path=osp.join(self.data_root, self.version, self.version),
verbose=True)
eval_set_map = {
'v1.01-train': 'val',
}
metrics = lyft_eval(lyft, self.data_root, result_path,
eval_set_map[self.version], output_dir, logger)
# record metrics
detail = dict()
metric_prefix = f'{result_name}_Lyft'
for i, name in enumerate(metrics['class_names']):
AP = float(metrics['mAPs_cate'][i])
detail[f'{metric_prefix}/{name}_AP'] = AP
detail[f'{metric_prefix}/mAP'] = metrics['Final mAP']
return detail
def format_results(self, results, jsonfile_prefix=None, csv_savepath=None):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert isinstance(results, list), 'results must be a list'
assert len(results) == len(self), (
'The length of results is not equal to the dataset len: {} != {}'.
format(len(results), len(self)))
if jsonfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
jsonfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on Lyft
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):
result_files = self._format_bbox(results, jsonfile_prefix)
else:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files = dict()
for name in results[0]:
print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
{name: self._format_bbox(results_, tmp_file_)})
if csv_savepath is not None:
self.json2csv(result_files['pts_bbox'], csv_savepath)
return result_files, tmp_dir
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
csv_savepath=None,
result_names=['pts_bbox'],
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in Lyft protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str, optional): The prefix of json files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
csv_savepath (str, optional): The path for saving csv files.
It includes the file path and the csv filename,
e.g., "a/b/filename.csv". If not specified,
the result will not be converted to csv file.
result_names (list[str], optional): Result names in the
metric prefix. Default: ['pts_bbox'].
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Evaluation results.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix,
csv_savepath)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print(f'Evaluating bboxes of {name}')
ret_dict = self._evaluate_single(result_files[name])
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files)
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir:
self.show(results, out_dir, show=show, pipeline=pipeline)
return results_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
return Compose(pipeline)
def show(self, results, out_dir, show=False, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
if 'pts_bbox' in result.keys():
result = result['pts_bbox']
data_info = self.data_infos[i]
pts_path = data_info['lidar_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points = self._extract_data(i, pipeline, 'points').numpy()
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
inds = result['scores_3d'] > 0.1
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
pred_bboxes = result['boxes_3d'][inds].tensor.numpy()
show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
file_name, show)
def json2csv(self, json_path, csv_savepath):
"""Convert the json file to csv format for submission.
Args:
json_path (str): Path of the result json file.
csv_savepath (str): Path to save the csv file.
"""
results = mmcv.load(json_path)['results']
sample_list_path = osp.join(self.data_root, 'sample_submission.csv')
data = pd.read_csv(sample_list_path)
Id_list = list(data['Id'])
pred_list = list(data['PredictionString'])
cnt = 0
print('Converting the json to csv...')
for token in results.keys():
cnt += 1
predictions = results[token]
prediction_str = ''
for i in range(len(predictions)):
prediction_str += \
str(predictions[i]['score']) + ' ' + \
str(predictions[i]['translation'][0]) + ' ' + \
str(predictions[i]['translation'][1]) + ' ' + \
str(predictions[i]['translation'][2]) + ' ' + \
str(predictions[i]['size'][0]) + ' ' + \
str(predictions[i]['size'][1]) + ' ' + \
str(predictions[i]['size'][2]) + ' ' + \
str(Quaternion(list(predictions[i]['rotation']))
.yaw_pitch_roll[0]) + ' ' + \
predictions[i]['name'] + ' '
prediction_str = prediction_str[:-1]
idx = Id_list.index(token)
pred_list[idx] = prediction_str
df = pd.DataFrame({'Id': Id_list, 'PredictionString': pred_list})
mmcv.mkdir_or_exist(os.path.dirname(csv_savepath))
df.to_csv(csv_savepath, index=False)
def output_to_lyft_box(detection):
"""Convert the output to the box class in the Lyft.
Args:
detection (dict): Detection results.
Returns:
list[:obj:`LyftBox`]: List of standard LyftBoxes.
"""
box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# our LiDAR coordinate system -> Lyft box coordinate system
lyft_box_dims = box_dims[:, [1, 0, 2]]
box_list = []
for i in range(len(box3d)):
quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
box = LyftBox(
box_gravity_center[i],
lyft_box_dims[i],
quat,
label=labels[i],
score=scores[i])
box_list.append(box)
return box_list
def lidar_lyft_box_to_global(info, boxes):
"""Convert the box from ego to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`LyftBox`]): List of predicted LyftBoxes.
Returns:
list: List of standard LyftBoxes in the global
coordinate.
"""
box_list = []
for box in boxes:
# Move box to ego vehicle coord system
box.rotate(Quaternion(info['lidar2ego_rotation']))
box.translate(np.array(info['lidar2ego_translation']))
# Move box to global coord system
box.rotate(Quaternion(info['ego2global_rotation']))
box.translate(np.array(info['ego2global_translation']))
box_list.append(box)
return box_list
# Copyright (c) OpenMMLab. All rights reserved.
import tempfile
from os import path as osp
import mmcv
import numpy as np
import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox
from ..core import show_result
from ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes
from .builder import DATASETS
from .custom_3d import Custom3DDataset
from .pipelines import Compose
@DATASETS.register_module()
class NuScenesDataset(Custom3DDataset):
r"""NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset.
Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
data_root (str): Path of dataset root.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to True.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes.
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
eval_version (bool, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool, optional): Whether to use `use_valid_flag` key
in the info file as mask to filter gt_boxes and gt_names.
Defaults to False.
"""
NameMapping = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.car': 'car',
'vehicle.construction': 'construction_vehicle',
'vehicle.motorcycle': 'motorcycle',
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'human.pedestrian.police_officer': 'pedestrian',
'movable_object.trafficcone': 'traffic_cone',
'vehicle.trailer': 'trailer',
'vehicle.truck': 'truck'
}
DefaultAttribute = {
'car': 'vehicle.parked',
'pedestrian': 'pedestrian.moving',
'trailer': 'vehicle.parked',
'truck': 'vehicle.parked',
'bus': 'vehicle.moving',
'motorcycle': 'cycle.without_rider',
'construction_vehicle': 'vehicle.parked',
'bicycle': 'cycle.without_rider',
'barrier': '',
'traffic_cone': '',
}
AttrMapping = {
'cycle.with_rider': 0,
'cycle.without_rider': 1,
'pedestrian.moving': 2,
'pedestrian.standing': 3,
'pedestrian.sitting_lying_down': 4,
'vehicle.moving': 5,
'vehicle.parked': 6,
'vehicle.stopped': 7,
}
AttrMapping_rev = [
'cycle.with_rider',
'cycle.without_rider',
'pedestrian.moving',
'pedestrian.standing',
'pedestrian.sitting_lying_down',
'vehicle.moving',
'vehicle.parked',
'vehicle.stopped',
]
# https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa
ErrNameMapping = {
'trans_err': 'mATE',
'scale_err': 'mASE',
'orient_err': 'mAOE',
'vel_err': 'mAVE',
'attr_err': 'mAAE'
}
CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
def __init__(self,
ann_file,
pipeline=None,
data_root=None,
classes=None,
load_interval=1,
with_velocity=True,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
eval_version='detection_cvpr_2019',
use_valid_flag=False):
self.load_interval = load_interval
self.use_valid_flag = use_valid_flag
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
self.with_velocity = with_velocity
self.eval_version = eval_version
from nuscenes.eval.detection.config import config_factory
self.eval_detection_configs = config_factory(self.eval_version)
if self.modality is None:
self.modality = dict(
use_camera=False,
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
)
def get_cat_ids(self, idx):
"""Get category distribution of single scene.
Args:
idx (int): Index of the data_info.
Returns:
dict[list]: for each category, if the current scene
contains such boxes, store a list containing idx,
otherwise, store empty list.
"""
info = self.data_infos[idx]
if self.use_valid_flag:
mask = info['valid_flag']
gt_names = set(info['gt_names'][mask])
else:
gt_names = set(info['gt_names'])
cat_ids = []
for name in gt_names:
if name in self.CLASSES:
cat_ids.append(self.cat2id[name])
return cat_ids
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
data = mmcv.load(ann_file, file_format='pkl')
data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
return data_infos
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
# standard protocol modified from SECOND.Pytorch
input_dict = dict(
sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
timestamp=info['timestamp'] / 1e6,
)
if self.modality['use_camera']:
image_paths = []
lidar2img_rts = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['data_path'])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
'sensor2lidar_translation'] @ lidar2cam_r.T
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = cam_info['cam_intrinsic']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt.T)
lidar2img_rts.append(lidar2img_rt)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
))
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def get_ann_info(self, index):
"""Get annotation info according to the given index.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information consists of the following keys:
- gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes
- gt_labels_3d (np.ndarray): Labels of ground truths.
- gt_names (list[str]): Class names of ground truths.
"""
info = self.data_infos[index]
# filter out bbox containing no points
if self.use_valid_flag:
mask = info['valid_flag']
else:
mask = info['num_lidar_pts'] > 0
gt_bboxes_3d = info['gt_boxes'][mask]
gt_names_3d = info['gt_names'][mask]
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
if self.with_velocity:
gt_velocity = info['gt_velocity'][mask]
nan_mask = np.isnan(gt_velocity[:, 0])
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_names=gt_names_3d)
return anns_results
def _format_bbox(self, results, jsonfile_prefix=None):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos = {}
mapped_class_names = self.CLASSES
print('Start to convert detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
annos = []
boxes = output_to_nusc_box(det, self.with_velocity)
sample_token = self.data_infos[sample_id]['token']
boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
mapped_class_names,
self.eval_detection_configs,
self.eval_version)
for i, box in enumerate(boxes):
name = mapped_class_names[box.label]
if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:
if name in [
'car',
'construction_vehicle',
'bus',
'truck',
'trailer',
]:
attr = 'vehicle.moving'
elif name in ['bicycle', 'motorcycle']:
attr = 'cycle.with_rider'
else:
attr = NuScenesDataset.DefaultAttribute[name]
else:
if name in ['pedestrian']:
attr = 'pedestrian.standing'
elif name in ['bus']:
attr = 'vehicle.stopped'
else:
attr = NuScenesDataset.DefaultAttribute[name]
nusc_anno = dict(
sample_token=sample_token,
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
velocity=box.velocity[:2].tolist(),
detection_name=name,
detection_score=box.score,
attribute_name=attr)
annos.append(nusc_anno)
nusc_annos[sample_token] = annos
nusc_submissions = {
'meta': self.modality,
'results': nusc_annos,
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
print('Results writes to', res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='pts_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
metric (str, optional): Metric name used for evaluation.
Default: 'bbox'.
result_name (str, optional): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from nuscenes import NuScenes
from nuscenes.eval.detection.evaluate import NuScenesEval
output_dir = osp.join(*osp.split(result_path)[:-1])
nusc = NuScenes(
version=self.version, dataroot=self.data_root, verbose=False)
eval_set_map = {
'v1.0-mini': 'mini_val',
'v1.0-trainval': 'val',
}
nusc_eval = NuScenesEval(
nusc,
config=self.eval_detection_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=False)
nusc_eval.main(render_curves=False)
# record metrics
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = f'{result_name}_NuScenes'
for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['label_tp_errors'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['tp_errors'].items():
val = float('{:.4f}'.format(v))
detail['{}/{}'.format(metric_prefix,
self.ErrNameMapping[k])] = val
detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
return detail
def format_results(self, results, jsonfile_prefix=None):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: Returns (result_files, tmp_dir), where `result_files` is a
dict containing the json filepaths, `tmp_dir` is the temporal
directory created for saving json files when
`jsonfile_prefix` is not specified.
"""
assert isinstance(results, list), 'results must be a list'
assert len(results) == len(self), (
'The length of results is not equal to the dataset len: {} != {}'.
format(len(results), len(self)))
if jsonfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
jsonfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on nuScenes
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):
result_files = self._format_bbox(results, jsonfile_prefix)
else:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files = dict()
for name in results[0]:
print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
{name: self._format_bbox(results_, tmp_file_)})
return result_files, tmp_dir
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
result_names=['pts_bbox'],
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'bbox'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str, optional): The prefix of json files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print('Evaluating bboxes of {}'.format(name))
ret_dict = self._evaluate_single(result_files[name])
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files)
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir:
self.show(results, out_dir, show=show, pipeline=pipeline)
return results_dict
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
return Compose(pipeline)
def show(self, results, out_dir, show=False, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Whether to visualize the results online.
Default: False.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
if 'pts_bbox' in result.keys():
result = result['pts_bbox']
data_info = self.data_infos[i]
pts_path = data_info['lidar_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points = self._extract_data(i, pipeline, 'points').numpy()
# for now we convert points into depth mode
points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
inds = result['scores_3d'] > 0.1
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
show_gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
pred_bboxes = result['boxes_3d'][inds].tensor.numpy()
show_pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,
Box3DMode.DEPTH)
show_result(points, show_gt_bboxes, show_pred_bboxes, out_dir,
file_name, show)
def output_to_nusc_box(detection, with_velocity=True):
"""Convert the output to the box class in the nuScenes.
Args:
detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
"""
box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# our LiDAR coordinate system -> nuScenes box coordinate system
nus_box_dims = box_dims[:, [1, 0, 2]]
box_list = []
for i in range(len(box3d)):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
if with_velocity:
velocity = (*box3d.tensor[i, 7:9], 0.0)
else:
velocity = (0, 0, 0)
# velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6]
# velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box = NuScenesBox(
box_gravity_center[i],
nus_box_dims[i],
quat,
label=labels[i],
score=scores[i],
velocity=velocity)
box_list.append(box)
return box_list
def lidar_nusc_box_to_global(info,
boxes,
classes,
eval_configs,
eval_version='detection_cvpr_2019'):
"""Convert the box from ego to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str, optional): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list = []
for box in boxes:
# Move box to ego vehicle coord system
box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))
box.translate(np.array(info['lidar2ego_translation']))
# filter det in ego.
cls_range_map = eval_configs.class_range
radius = np.linalg.norm(box.center[:2], 2)
det_range = cls_range_map[classes[box.label]]
if radius > det_range:
continue
# Move box to global coord system
box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))
box.translate(np.array(info['ego2global_translation']))
box_list.append(box)
return box_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment