Commit b107238d authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'clean-iou' into 'master'

clean iou calculation

See merge request open-mmlab/mmdet.3d!84
parents b2c43ffd 8c4c9aee
from . import box_torch_ops
from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
from .coders import DeltaXYZWLHRBBoxCoder from .coders import DeltaXYZWLHRBBoxCoder
# from .bbox_target import bbox_target # from .bbox_target import bbox_target
...@@ -9,10 +8,8 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -9,10 +8,8 @@ from .samplers import (BaseSampler, CombinedSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes, DepthInstance3DBoxes, LiDARInstance3DBoxes,
xywhr2xyxyr) limit_period, points_cam2img, xywhr2xyxyr)
from .transforms import (bbox3d2result, bbox3d2roi, bbox3d_mapping_back, from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings build_bbox_coder, # temporally settings
...@@ -22,11 +19,10 @@ __all__ = [ ...@@ -22,11 +19,10 @@ __all__ = [
'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 'BaseSampler', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 'BaseSampler',
'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler', 'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult', 'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops', 'build_assigner', 'build_sampler', 'assign_and_sample', 'build_bbox_coder',
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d', 'Box3DMode',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result', 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes', 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img'
'BaseInstance3DBoxes', 'bbox3d_mapping_back', 'xywhr2xyxyr'
] ]
# TODO: clean the functions in this file and move the APIs into box structures
# in the future
import numba import numba
import numpy as np import numpy as np
...@@ -248,7 +251,7 @@ def rotation_points_single_angle(points, angle, axis=0): ...@@ -248,7 +251,7 @@ def rotation_points_single_angle(points, angle, axis=0):
return points @ rot_mat_T, rot_mat_T return points @ rot_mat_T, rot_mat_T
def project_to_image(points_3d, proj_mat): def points_cam2img(points_3d, proj_mat):
points_shape = list(points_3d.shape) points_shape = list(points_3d.shape)
points_shape[-1] = 1 points_shape[-1] = 1
points_4 = np.concatenate([points_3d, np.zeros(points_shape)], axis=-1) points_4 = np.concatenate([points_3d, np.zeros(points_shape)], axis=-1)
...@@ -260,7 +263,7 @@ def project_to_image(points_3d, proj_mat): ...@@ -260,7 +263,7 @@ def project_to_image(points_3d, proj_mat):
def box3d_to_bbox(box3d, rect, Trv2c, P2): def box3d_to_bbox(box3d, rect, Trv2c, P2):
box_corners = center_to_corner_box3d( box_corners = center_to_corner_box3d(
box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1) box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
box_corners_in_image = project_to_image(box_corners, P2) box_corners_in_image = points_cam2img(box_corners, P2)
# box_corners_in_image: [N, 8, 2] # box_corners_in_image: [N, 8, 2]
minxy = np.min(box_corners_in_image, axis=1) minxy = np.min(box_corners_in_image, axis=1)
maxxy = np.max(box_corners_in_image, axis=1) maxxy = np.max(box_corners_in_image, axis=1)
......
import numpy as np
import torch
def limit_period(val, offset=0.5, period=np.pi):
return val - torch.floor(val / period + offset) * period
def corners_nd(dims, origin=0.5):
"""Generate relative box corners based on length per dim and
origin point.
Args:
dims (np.ndarray, shape=[N, ndim]): Array of length per dim
origin (list or array or float): Origin point relate to smallest point.
Returns:
np.ndarray: Corners of boxes in shape [N, 2 ** ndim, ndim].
point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
(3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
where x0 < x1, y0 < y1, z0 < z1
"""
ndim = int(dims.shape[1])
corners_norm = torch.from_numpy(
np.stack(np.unravel_index(np.arange(2**ndim), [2] * ndim), axis=1)).to(
device=dims.device, dtype=dims.dtype)
# now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
# (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
# so need to convert to a format which is convenient to do other computing.
# for 2d boxes, format is clockwise start with minimum point
# for 3d boxes, please draw lines by your hand.
if ndim == 2:
# generate clockwise box corners
corners_norm = corners_norm[[0, 1, 3, 2]]
elif ndim == 3:
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
corners_norm = corners_norm - dims.new_tensor(origin)
corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
[1, 2**ndim, ndim])
return corners
def rotation_3d_in_axis(points, angles, axis=0):
# points: [N, point_size, 3]
# angles: [N]
rot_sin = torch.sin(angles)
rot_cos = torch.cos(angles)
ones = torch.ones_like(rot_cos)
zeros = torch.zeros_like(rot_cos)
if axis == 1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, zeros, -rot_sin]),
torch.stack([zeros, ones, zeros]),
torch.stack([rot_sin, zeros, rot_cos])
])
elif axis == 2 or axis == -1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, -rot_sin, zeros]),
torch.stack([rot_sin, rot_cos, zeros]),
torch.stack([zeros, zeros, ones])
])
elif axis == 0:
rot_mat_T = torch.stack([
torch.stack([zeros, rot_cos, -rot_sin]),
torch.stack([zeros, rot_sin, rot_cos]),
torch.stack([ones, zeros, zeros])
])
else:
raise ValueError('axis should in range')
return torch.einsum('aij,jka->aik', (points, rot_mat_T))
def center_to_corner_box3d(centers,
dims,
angles,
origin=(0.5, 1.0, 0.5),
axis=1):
"""Convert kitti locations, dimensions and angles to corners.
Args:
centers (np.ndarray): Locations in kitti label file
with the shape of [N, 3].
dims (np.ndarray): Dimensions in kitti label
file with the shape of [N, 3]
angles (np.ndarray): Rotation_y in kitti
label file with the shape of [N]
origin (list or array or float): Origin point relate to smallest point.
use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
axis (int): Rotation axis. 1 for camera and 2 for lidar.
Returns:
torch.Tensor: Corners with the shape of [N, 8, 3].
"""
# 'length' in kitti format is in x axis.
# yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 8, 3]
corners = rotation_3d_in_axis(corners, angles, axis=axis)
corners += centers.view(-1, 1, 3)
return corners
def lidar_to_camera(points, r_rect, velo2cam):
num_points = points.shape[0]
points = torch.cat(
[points, torch.ones(num_points, 1).type_as(points)], dim=-1)
camera_points = points @ (r_rect @ velo2cam).t()
return camera_points[..., :3]
def box_lidar_to_camera(data, r_rect, velo2cam):
xyz_lidar = data[..., 0:3]
w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6]
r = data[..., 6:7]
xyz = lidar_to_camera(xyz_lidar, r_rect, velo2cam)
return torch.cat([xyz, l, h, w, r], dim=-1)
def project_to_image(points_3d, proj_mat):
points_num = list(points_3d.shape)[:-1]
points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
# previous implementation use new_zeros, new_one yeilds better results
points_4 = torch.cat(
[points_3d, points_3d.new_ones(*points_shape)], dim=-1)
# point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
point_2d = torch.matmul(points_4, proj_mat.t())
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
return point_2d_res
def rbbox2d_to_near_bbox(rbboxes):
"""convert rotated bbox to nearest 'standing' or 'lying' bbox.
Args:
rbboxes (torch.Tensor): [N, 5(x, y, xdim, ydim, rad)] rotated bboxes.
Returns:
torch.Tensor: Bboxes with the shape of [N, 4(xmin, ymin, xmax, ymax)].
"""
rots = rbboxes[..., -1]
rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, np.pi))
cond = (rots_0_pi_div_2 > np.pi / 4)[..., None]
bboxes_center = torch.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
return bboxes
def center_to_minmax_2d_0_5(centers, dims):
return torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
def center_to_minmax_2d(centers, dims, origin=0.5):
if origin == 0.5:
return center_to_minmax_2d_0_5(centers, dims)
corners = center_to_corner_box2d(centers, dims, origin=origin)
return corners[:, [0, 2]].reshape([-1, 4])
def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
"""Convert kitti locations, dimensions and angles to corners.
format: center(xy), dims(xy), angles(clockwise when positive)
Args:
centers (np.ndarray, shape=[N, 2]): locations in kitti label file.
dims (np.ndarray, shape=[N, 2]): dimensions in kitti label file.
angles (np.ndarray, shape=[N]): rotation_y in kitti label file.
Returns:
torch.Tensor: Corners with the shape of [N, 4, 2].
"""
# 'length' in kitti format is in x axis.
# xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 4, 2]
if angles is not None:
corners = rotation_2d(corners, angles)
corners += centers.reshape([-1, 1, 2])
return corners
def rotation_2d(points, angles):
"""rotation 2d points based on origin point clockwise when angle positive.
Args:
points (np.ndarray, shape=[N, point_size, 2]): points to be rotated.
angles (np.ndarray, shape=[N]): rotation angle.
Returns:
np.ndarray: Same shape as points.
"""
rot_sin = torch.sin(angles)
rot_cos = torch.cos(angles)
rot_mat_T = torch.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
return torch.einsum('aij,jka->aik', points, rot_mat_T)
def enlarge_box3d_lidar(boxes3d, extra_width):
"""Enlarge the length, width and height of input boxes
Args:
boxes3d (torch.float32 or numpy.float32): bottom_center with
shape [N, 7], (x, y, z, w, l, h, ry) in LiDAR coords
extra_width (float): a fix number to add
Returns:
torch.float32 or numpy.float32: enlarged boxes
"""
if isinstance(boxes3d, np.ndarray):
large_boxes3d = boxes3d.copy()
else:
large_boxes3d = boxes3d.clone()
large_boxes3d[:, 3:6] += extra_width * 2
large_boxes3d[:, 2] -= extra_width # bottom center z minus extra_width
return large_boxes3d
def boxes3d_to_corners3d_lidar_torch(boxes3d, bottom_center=True):
"""Convert kitti center boxes to corners.
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
w, l, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
ry = boxes3d[:, 6:7]
zeros = boxes3d.new_zeros(boxes_num, 1)
ones = boxes3d.new_ones(boxes_num, 1)
x_corners = torch.cat(
[w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
dim=1) # (N, 8)
if bottom_center:
z_corners = torch.cat([zeros, zeros, zeros, zeros, h, h, h, h],
dim=1) # (N, 8)
else:
z_corners = torch.cat([
-h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(ry), torch.sin(ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners = torch.cat((x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)),
dim=2)
return corners
import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar
from mmdet.core.bbox import bbox_overlaps from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
from .. import box_torch_ops from ..structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
LiDARInstance3DBoxes)
@IOU_CALCULATORS.register_module() @IOU_CALCULATORS.register_module()
class BboxOverlapsNearest3D(object): class BboxOverlapsNearest3D(object):
"""Nearest 3D IoU Calculator""" """Nearest 3D IoU Calculator
Note:
This IoU calculator first finds the nearest 2D boxes in bird eye view
(BEV), and then calculate the 2D IoU using ``:meth:bbox_overlaps``.
Args:
coordinate (str): 'camera', 'lidar', or 'depth' coordinate system
"""
def __init__(self, coordinate='lidar'):
assert coordinate in ['camera', 'lidar', 'depth']
self.coordinate = coordinate
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned) return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned,
self.coordinate)
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += '(mode={}, is_aligned={})'.format(self.mode, repr_str += f'(coordinate={self.coordinate}'
self.is_aligned)
return repr_str return repr_str
...@@ -25,11 +35,11 @@ class BboxOverlaps3D(object): ...@@ -25,11 +35,11 @@ class BboxOverlaps3D(object):
"""3D IoU Calculator """3D IoU Calculator
Args: Args:
coordinate (str): 'camera' or 'lidar' coordinate system coordinate (str): 'camera', 'lidar', or 'depth' coordinate system
""" """
def __init__(self, coordinate): def __init__(self, coordinate):
assert coordinate in ['camera', 'lidar'] assert coordinate in ['camera', 'lidar', 'depth']
self.coordinate = coordinate self.coordinate = coordinate
def __call__(self, bboxes1, bboxes2, mode='iou'): def __call__(self, bboxes1, bboxes2, mode='iou'):
...@@ -37,35 +47,63 @@ class BboxOverlaps3D(object): ...@@ -37,35 +47,63 @@ class BboxOverlaps3D(object):
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += '(mode={}, is_aligned={})'.format(self.mode, repr_str += f'(coordinate={self.coordinate}'
self.is_aligned)
return repr_str return repr_str
def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): def bbox_overlaps_nearest_3d(bboxes1,
bboxes2,
mode='iou',
is_aligned=False,
coordinate='lidar'):
"""Calculate nearest 3D IoU """Calculate nearest 3D IoU
Note:
This function first finds the nearest 2D boxes in bird eye view
(BEV), and then calculate the 2D IoU using ``:meth:bbox_overlaps``.
Ths IoU calculator ``:class:BboxOverlapsNearest3D`` uses this
function to calculate IoUs of boxes.
If ``is_aligned`` is ``False``, then it calculates the ious between
each bbox of bboxes1 and bboxes2, otherwise the ious between each
aligned pair of bboxes1 and bboxes2.
Args: Args:
bboxes1 (torch.Tensor): shape (N, 7+N) [x, y, z, h, w, l, ry, v]. bboxes1 (torch.Tensor): shape (N, 7+N) [x, y, z, h, w, l, ry, v].
bboxes2 (torch.Tensor): shape (M, 7+N) [x, y, z, h, w, l, ry, v]. bboxes2 (torch.Tensor): shape (M, 7+N) [x, y, z, h, w, l, ry, v].
mode (str): "iou" (intersection over union) or iof mode (str): "iou" (intersection over union) or iof
(intersection over foreground). (intersection over foreground).
is_aligned (bool): Whether the calculation is aligned
Return: Return:
torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 torch.Tensor: If ``is_aligned`` is ``True``, return ious between
with shape (M, N).(not support aligned mode currently). bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is
``False``, return shape is M.
""" """
assert bboxes1.size(-1) >= 7 assert bboxes1.size(-1) >= 7
assert bboxes2.size(-1) >= 7 assert bboxes2.size(-1) >= 7
column_index1 = bboxes1.new_tensor([0, 1, 3, 4, 6], dtype=torch.long)
rbboxes1_bev = bboxes1.index_select(dim=-1, index=column_index1) if coordinate == 'camera':
rbboxes2_bev = bboxes2.index_select(dim=-1, index=column_index1) box_type = CameraInstance3DBoxes
elif coordinate == 'lidar':
box_type = LiDARInstance3DBoxes
elif coordinate == 'depth':
box_type = DepthInstance3DBoxes
else:
raise ValueError(
'"coordinate" should be in ["camera", "lidar", "depth"],'
f' got invalid {coordinate}')
bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
# Change the bboxes to bev # Change the bboxes to bev
# box conversion and iou calculation in torch version on CUDA # box conversion and iou calculation in torch version on CUDA
# is 10x faster than that in numpy version # is 10x faster than that in numpy version
bboxes1_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes1_bev) bboxes1_bev = bboxes1.nearest_bev
bboxes2_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes2_bev) bboxes2_bev = bboxes2.nearest_bev
ret = bbox_overlaps( ret = bbox_overlaps(
bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned) bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned)
return ret return ret
...@@ -74,6 +112,11 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -74,6 +112,11 @@ def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
"""Calculate 3D IoU using cuda implementation """Calculate 3D IoU using cuda implementation
Note:
This function calculate the IoU of 3D boxes based on their volumes.
IoU calculator ``:class:BboxOverlaps3D`` uses this function to
calculate the actual IoUs of boxes.
Args: Args:
bboxes1 (torch.Tensor): shape (N, 7) [x, y, z, h, w, l, ry]. bboxes1 (torch.Tensor): shape (N, 7) [x, y, z, h, w, l, ry].
bboxes2 (torch.Tensor): shape (M, 7) [x, y, z, h, w, l, ry]. bboxes2 (torch.Tensor): shape (M, 7) [x, y, z, h, w, l, ry].
...@@ -83,19 +126,21 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): ...@@ -83,19 +126,21 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
Return: Return:
torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
with shape (M, N).(not support aligned mode currently). with shape (M, N) (aligned mode is not supported currently).
""" """
assert bboxes1.size(-1) == bboxes2.size(-1) == 7 assert bboxes1.size(-1) == bboxes2.size(-1) == 7
assert coordinate in ['camera', 'lidar']
rows = bboxes1.size(0)
cols = bboxes2.size(0)
if rows * cols == 0:
return bboxes1.new(rows, cols)
if coordinate == 'camera': if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode) box_type = CameraInstance3DBoxes
elif coordinate == 'lidar': elif coordinate == 'lidar':
return boxes_iou3d_gpu_lidar(bboxes1, bboxes2, mode) box_type = LiDARInstance3DBoxes
elif coordinate == 'depth':
box_type = DepthInstance3DBoxes
else: else:
raise NotImplementedError raise ValueError(
'"coordinate" should be in ["camera", "lidar", "depth"],'
f' got invalid {coordinate}')
bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)
...@@ -3,9 +3,11 @@ from .box_3d_mode import Box3DMode ...@@ -3,9 +3,11 @@ from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes
from .utils import xywhr2xyxyr from .utils import (limit_period, points_cam2img, rotation_3d_in_axis,
xywhr2xyxyr)
__all__ = [ __all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr' 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
'rotation_3d_in_axis', 'limit_period', 'points_cam2img'
] ]
...@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object): ...@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object):
Note: Note:
The box is bottom centered, i.e. the relative position of origin in The box is bottom centered, i.e. the relative position of origin in
the box is (0.5, 0.5, 0). the box is (0.5, 0.5, 0).
Args: Args:
tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix. tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
...@@ -424,6 +424,11 @@ class BaseInstance3DBoxes(object): ...@@ -424,6 +424,11 @@ class BaseInstance3DBoxes(object):
assert mode in ['iou', 'iof'] assert mode in ['iou', 'iof']
rows = len(boxes1)
cols = len(boxes2)
if rows * cols == 0:
return boxes1.tensor.new(rows, cols)
# height overlap # height overlap
overlaps_h = cls.height_overlaps(boxes1, boxes2) overlaps_h = cls.height_overlaps(boxes1, boxes2)
......
...@@ -72,3 +72,15 @@ def xywhr2xyxyr(boxes_xywhr): ...@@ -72,3 +72,15 @@ def xywhr2xyxyr(boxes_xywhr):
boxes[:, 3] = boxes_xywhr[:, 1] + half_h boxes[:, 3] = boxes_xywhr[:, 1] + half_h
boxes[:, 4] = boxes_xywhr[:, 4] boxes[:, 4] = boxes_xywhr[:, 4]
return boxes return boxes
def points_cam2img(points_3d, proj_mat):
points_num = list(points_3d.shape)[:-1]
points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
# previous implementation use new_zeros, new_one yeilds better results
points_4 = torch.cat(
[points_3d, points_3d.new_ones(*points_shape)], dim=-1)
# point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
point_2d = torch.matmul(points_4, proj_mat.t())
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
return point_2d_res
...@@ -13,69 +13,6 @@ def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical): ...@@ -13,69 +13,6 @@ def bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):
return new_bboxes return new_bboxes
def transform_lidar_to_cam(boxes_lidar):
"""Transform boxes from lidar coords to cam coords.
Only transform format, not exactly in camera coords.
Args:
boxes_lidar (torch.Tensor): (N, 3 or 7) [x, y, z, w, l, h, ry]
in LiDAR coords.
boxes_cam (torch.Tensor): (N, 3 or 7) [x, y, z, h, w, l, ry]
in camera coords.
Returns:
torch.Tensor: Boxes in camera coords.
"""
# boxes_cam = boxes_lidar.new_tensor(boxes_lidar.data)
boxes_cam = boxes_lidar.clone().detach()
boxes_cam[:, 0] = -boxes_lidar[:, 1]
boxes_cam[:, 1] = -boxes_lidar[:, 2]
boxes_cam[:, 2] = boxes_lidar[:, 0]
if boxes_cam.shape[1] > 3:
boxes_cam[:, [3, 4, 5]] = boxes_lidar[:, [5, 3, 4]]
return boxes_cam
def boxes3d_to_bev_torch(boxes3d):
"""Transform 3d boxes to bev in camera coords.
Args:
boxes3d (torch.Tensor): 3d boxes in camera coords
with the shape of [N, 7] (x, y, z, h, w, l, ry).
Returns:
torch.Tensor: Bev boxes with the shape of [N, 5]
(x1, y1, x2, y2, ry).
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""Transform 3d boxes to bev in lidar coords.
Args:
boxes3d (torch.Tensor): 3d boxes in lidar coords
with the shape of [N, 7] (x, y, z, h, w, l, ry).
Returns: Bev boxes with the shape of [N, 5] (x1, y1, x2, y2, ry).
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_w, cv - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def bbox3d2roi(bbox_list): def bbox3d2roi(bbox_list):
"""Convert a list of bboxes to roi format. """Convert a list of bboxes to roi format.
...@@ -113,88 +50,3 @@ def bbox3d2result(bboxes, scores, labels): ...@@ -113,88 +50,3 @@ def bbox3d2result(bboxes, scores, labels):
boxes_3d=bboxes.to('cpu'), boxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(), scores_3d=scores.cpu(),
labels_3d=labels.cpu()) labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
bboxes=None,
to_bottom_center=False):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | torch.Tensor): points in upright depth coordinate.
bboxes (None | torch.Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if points is not None:
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
else:
points_lidar = None
if bboxes is not None:
bboxes_lidar = bboxes.clone()
bboxes_lidar = bboxes_lidar[..., [1, 0, 2, 4, 3, 5, 6]]
bboxes_lidar[..., 1] *= -1
if to_bottom_center:
bboxes_lidar[..., 2] -= 0.5 * bboxes_lidar[..., 5]
else:
bboxes_lidar = None
return points_lidar, bboxes_lidar
def box3d_to_corner3d_upright_depth(boxes3d):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (torch.Tensor): boxes with shape [n,7] in
upright depth coordinate.
Returns:
torch.Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num = boxes3d.shape[0]
ry = boxes3d[:, 6:7]
l, w, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
zeros = boxes3d.new_zeros((boxes_num, 1))
ones = boxes3d.new_ones((boxes_num, 1))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners = torch.cat(
[-l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[w / 2., w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2.],
dim=1) # (N, 8)
z_corners = torch.cat(
[h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(-ry), torch.sin(-ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners3d = torch.cat(
(x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)), dim=2)
return corners3d
...@@ -3,12 +3,21 @@ import numpy as np ...@@ -3,12 +3,21 @@ import numpy as np
class VoxelGenerator(object): class VoxelGenerator(object):
"""Voxel generator in numpy implementation"""
def __init__(self, def __init__(self,
voxel_size, voxel_size,
point_cloud_range, point_cloud_range,
max_num_points, max_num_points,
max_voxels=20000): max_voxels=20000):
"""
Args:
voxel_size (list[float]): Size of a single voxel
point_cloud_range (list[float]): Range of points
max_num_points (int): Maximum number of points in a single voxel
max_voxels (int, optional): Maximum number of voxels.
Defaults to 20000.
"""
point_cloud_range = np.array(point_cloud_range, dtype=np.float32) point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1] # [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32) voxel_size = np.array(voxel_size, dtype=np.float32)
...@@ -55,24 +64,25 @@ def points_to_voxel(points, ...@@ -55,24 +64,25 @@ def points_to_voxel(points,
with jit and 3.2ghz cpu.(don't calculate other features) with jit and 3.2ghz cpu.(don't calculate other features)
Args: Args:
points: [N, ndim] float tensor. points[:, :3] contain xyz points and points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity. points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size
coors_range: [6] list/tuple or array, float. indicate voxel range. coors_range: [6] list/tuple or array, float. indicate voxel range.
format: xyzxyz, minmax format: xyzxyz, minmax
max_points: int. indicate maximum points contained in a voxel. max_points (int): Indicate maximum points contained in a voxel.
reverse_index: boolean. indicate whether return reversed coordinates. reverse_index (bool): Whether return reversed coordinates.
if points has xyz format and reverse_index is True, output if points has xyz format and reverse_index is True, output
coordinates will be zyx format, but points in features always coordinates will be zyx format, but points in features always
xyz format. xyz format.
max_voxels: int. indicate maximum voxels this function create. max_voxels (int): Maximum number of voxels this function create.
for second, 20000 is a good choice. you should shuffle points for second, 20000 is a good choice. Points should be shuffled for
before call this function because max_voxels may drop some points. randomness before this function because max_voxels drops points.
Returns: Returns:
voxels: [M, max_points, ndim] float tensor. only contain points. tuple[np.ndarray]:
coordinates: [M, 3] int32 tensor. voxels: [M, max_points, ndim] float tensor. only contain points.
num_points_per_voxel: [M] int32 tensor. coordinates: [M, 3] int32 tensor.
num_points_per_voxel: [M] int32 tensor.
""" """
if not isinstance(voxel_size, np.ndarray): if not isinstance(voxel_size, np.ndarray):
voxel_size = np.array(voxel_size, dtype=points.dtype) voxel_size = np.array(voxel_size, dtype=points.dtype)
......
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log from mmcv.utils import print_log
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import Box3DMode, CameraInstance3DBoxes from ..core.bbox import Box3DMode, CameraInstance3DBoxes, points_cam2img
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -463,7 +463,6 @@ class KittiDataset(Custom3DDataset): ...@@ -463,7 +463,6 @@ class KittiDataset(Custom3DDataset):
label_preds=np.zeros([0, 4]), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx) sample_idx=sample_idx)
from mmdet3d.core.bbox import box_torch_ops
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32)
...@@ -473,7 +472,7 @@ class KittiDataset(Custom3DDataset): ...@@ -473,7 +472,7 @@ class KittiDataset(Custom3DDataset):
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c) box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners box_corners = box_preds_camera.corners
box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2) box_corners_in_image = points_cam2img(box_corners, P2)
# box_corners_in_image: [N, 8, 2] # box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0] minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0] maxxy = torch.max(box_corners_in_image, dim=1)[0]
......
...@@ -3,9 +3,10 @@ import torch ...@@ -3,9 +3,10 @@ import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import bias_init_with_prob, normal_init from mmcv.cnn import bias_init_with_prob, normal_init
from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, box_torch_ops, from mmdet3d.core import (PseudoSampler, box3d_multiclass_nms,
boxes3d_to_bev_torch_lidar, build_anchor_generator, build_anchor_generator, build_assigner,
build_assigner, build_bbox_coder, build_sampler) build_bbox_coder, build_sampler, limit_period,
xywhr2xyxyr)
from mmdet.core import multi_apply from mmdet.core import multi_apply
from mmdet.models import HEADS from mmdet.models import HEADS
from ..builder import build_loss from ..builder import build_loss
...@@ -447,7 +448,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -447,7 +448,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
mlvl_dir_scores.append(dir_cls_score) mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes) mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.box_code_size).bev)
mlvl_scores = torch.cat(mlvl_scores) mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores) mlvl_dir_scores = torch.cat(mlvl_dir_scores)
...@@ -462,8 +464,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -462,8 +464,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
cfg, mlvl_dir_scores) cfg, mlvl_dir_scores)
bboxes, scores, labels, dir_scores = results bboxes, scores, labels, dir_scores = results
if bboxes.shape[0] > 0: if bboxes.shape[0] > 0:
dir_rot = box_torch_ops.limit_period( dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,
bboxes[..., 6] - self.dir_offset, self.dir_limit_offset, np.pi) self.dir_limit_offset, np.pi)
bboxes[..., 6] = ( bboxes[..., 6] = (
dir_rot + self.dir_offset + dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype)) np.pi * dir_scores.to(bboxes.dtype))
......
...@@ -3,7 +3,7 @@ from __future__ import division ...@@ -3,7 +3,7 @@ from __future__ import division
import numpy as np import numpy as np
import torch import torch
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar from mmdet3d.core import limit_period, xywhr2xyxyr
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS from mmdet.models import HEADS
from .anchor3d_head import Anchor3DHead from .anchor3d_head import Anchor3DHead
...@@ -172,7 +172,8 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -172,7 +172,8 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_dir_scores.append(dir_cls_score) mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes) mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
mlvl_bboxes, box_dim=self.box_code_size).bev)
mlvl_max_scores = torch.cat(mlvl_max_scores) mlvl_max_scores = torch.cat(mlvl_max_scores)
mlvl_label_pred = torch.cat(mlvl_label_pred) mlvl_label_pred = torch.cat(mlvl_label_pred)
mlvl_dir_scores = torch.cat(mlvl_dir_scores) mlvl_dir_scores = torch.cat(mlvl_dir_scores)
...@@ -246,9 +247,8 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -246,9 +247,8 @@ class PartA2RPNHead(Anchor3DHead):
labels.append(_mlvl_label_pred[selected]) labels.append(_mlvl_label_pred[selected])
cls_scores.append(_mlvl_cls_score[selected]) cls_scores.append(_mlvl_cls_score[selected])
dir_scores.append(_mlvl_dir_scores[selected]) dir_scores.append(_mlvl_dir_scores[selected])
dir_rot = box_torch_ops.limit_period( dir_rot = limit_period(bboxes[-1][..., 6] - self.dir_offset,
bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset, self.dir_limit_offset, np.pi)
np.pi)
bboxes[-1][..., 6] = ( bboxes[-1][..., 6] = (
dir_rot + self.dir_offset + dir_rot + self.dir_offset +
np.pi * dir_scores[-1].to(bboxes[-1].dtype)) np.pi * dir_scores[-1].to(bboxes[-1].dtype))
......
import numpy as np import numpy as np
import torch import torch
from mmdet3d.core import box_torch_ops from mmdet3d.core import limit_period
from mmdet.core import images_to_levels, multi_apply from mmdet.core import images_to_levels, multi_apply
...@@ -270,7 +270,7 @@ def get_direction_target(anchors, ...@@ -270,7 +270,7 @@ def get_direction_target(anchors,
torch.Tensor: Encoded direction targets. torch.Tensor: Encoded direction targets.
""" """
rot_gt = reg_targets[..., 6] + anchors[..., 6] rot_gt = reg_targets[..., 6] + anchors[..., 6]
offset_rot = box_torch_ops.limit_period(rot_gt - dir_offset, 0, 2 * np.pi) offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()
dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)
if one_hot: if one_hot:
......
...@@ -4,12 +4,12 @@ import torch.nn as nn ...@@ -4,12 +4,12 @@ import torch.nn as nn
from mmcv.cnn import ConvModule, normal_init, xavier_init from mmcv.cnn import ConvModule, normal_init, xavier_init
import mmdet3d.ops.spconv as spconv import mmdet3d.ops.spconv as spconv
from mmdet3d.core import build_bbox_coder from mmdet3d.core import build_bbox_coder, xywhr2xyxyr
from mmdet3d.core.bbox import box_torch_ops from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
rotation_3d_in_axis)
from mmdet3d.models.builder import build_loss from mmdet3d.models.builder import build_loss
from mmdet3d.ops import make_sparse_convmodule from mmdet3d.ops import make_sparse_convmodule
from mmdet3d.ops.iou3d.iou3d_utils import (boxes3d_to_bev_torch_lidar, nms_gpu, from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
nms_normal_gpu)
from mmdet.core import multi_apply from mmdet.core import multi_apply
from mmdet.models import HEADS from mmdet.models import HEADS
...@@ -335,7 +335,7 @@ class PartA2BboxHead(nn.Module): ...@@ -335,7 +335,7 @@ class PartA2BboxHead(nn.Module):
batch_anchors, batch_anchors,
pos_bbox_pred.view(-1, code_size)).view(-1, code_size) pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
pred_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis( pred_boxes3d[..., 0:3] = rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1), pred_boxes3d[..., 0:3].unsqueeze(1),
(pos_rois_rotation + np.pi / 2), (pos_rois_rotation + np.pi / 2),
axis=2).squeeze(1) axis=2).squeeze(1)
...@@ -412,7 +412,7 @@ class PartA2BboxHead(nn.Module): ...@@ -412,7 +412,7 @@ class PartA2BboxHead(nn.Module):
# canonical transformation # canonical transformation
pos_gt_bboxes_ct[..., 0:3] -= roi_center pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = box_torch_ops.rotation_3d_in_axis( pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), pos_gt_bboxes_ct[..., 0:3].unsqueeze(1),
-(roi_ry + np.pi / 2), -(roi_ry + np.pi / 2),
axis=2).squeeze(1) axis=2).squeeze(1)
...@@ -451,15 +451,17 @@ class PartA2BboxHead(nn.Module): ...@@ -451,15 +451,17 @@ class PartA2BboxHead(nn.Module):
""" """
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0] assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
pred_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch( # This is a little bit hack here because we assume the box for
pred_bbox3d) # Part-A2 is in LiDAR coordinates
gt_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch( gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)
gt_bbox3d) pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners
gt_box_corners = gt_boxes_structure.corners
# This flip only changes the heading direction of GT boxes
gt_bbox3d_flip = gt_boxes_structure.clone()
gt_bbox3d_flip.tensor[:, 6] += np.pi
gt_box_corners_flip = gt_bbox3d_flip.corners
gt_bbox3d_flip = gt_bbox3d.clone()
gt_bbox3d_flip[:, 6] += np.pi
gt_box_corners_flip = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
gt_bbox3d_flip)
corner_dist = torch.min( corner_dist = torch.min(
torch.norm(pred_box_corners - gt_box_corners, dim=2), torch.norm(pred_box_corners - gt_box_corners, dim=2),
torch.norm(pred_box_corners - gt_box_corners_flip, torch.norm(pred_box_corners - gt_box_corners_flip,
...@@ -504,7 +506,7 @@ class PartA2BboxHead(nn.Module): ...@@ -504,7 +506,7 @@ class PartA2BboxHead(nn.Module):
local_roi_boxes = roi_boxes.clone().detach() local_roi_boxes = roi_boxes.clone().detach()
local_roi_boxes[..., 0:3] = 0 local_roi_boxes[..., 0:3] = 0
rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
rcnn_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis( rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(
rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2), rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2),
axis=2).squeeze(1) axis=2).squeeze(1)
rcnn_boxes3d[:, 0:3] += roi_xyz rcnn_boxes3d[:, 0:3] += roi_xyz
...@@ -519,6 +521,7 @@ class PartA2BboxHead(nn.Module): ...@@ -519,6 +521,7 @@ class PartA2BboxHead(nn.Module):
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr, cfg.score_thr, cfg.nms_thr,
img_metas[batch_id],
cfg.use_rotate_nms) cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[selected] selected_bboxes = cur_rcnn_boxes3d[selected]
selected_label_preds = cur_class_labels[selected] selected_label_preds = cur_class_labels[selected]
...@@ -535,6 +538,7 @@ class PartA2BboxHead(nn.Module): ...@@ -535,6 +538,7 @@ class PartA2BboxHead(nn.Module):
box_preds, box_preds,
score_thr, score_thr,
nms_thr, nms_thr,
input_meta,
use_rotate_nms=True): use_rotate_nms=True):
if use_rotate_nms: if use_rotate_nms:
nms_func = nms_gpu nms_func = nms_gpu
...@@ -545,7 +549,8 @@ class PartA2BboxHead(nn.Module): ...@@ -545,7 +549,8 @@ class PartA2BboxHead(nn.Module):
1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}' 1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'
selected_list = [] selected_list = []
selected_labels = [] selected_labels = []
boxes_for_nms = boxes3d_to_bev_torch_lidar(box_preds) boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
box_preds, self.bbox_coder.code_size).bev)
score_thresh = score_thr if isinstance( score_thresh = score_thr if isinstance(
score_thr, list) else [score_thr for x in range(self.num_classes)] score_thr, list) else [score_thr for x in range(self.num_classes)]
......
...@@ -2,7 +2,7 @@ import torch ...@@ -2,7 +2,7 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmdet3d.core.bbox import box_torch_ops from mmdet3d.core.bbox.structures import rotation_3d_in_axis
from mmdet3d.models.builder import build_loss from mmdet3d.models.builder import build_loss
from mmdet.core import multi_apply from mmdet.core import multi_apply
from mmdet.models import HEADS from mmdet.models import HEADS
...@@ -109,7 +109,7 @@ class PointwiseSemanticHead(nn.Module): ...@@ -109,7 +109,7 @@ class PointwiseSemanticHead(nn.Module):
continue continue
fg_voxels = voxel_centers[k_box_flag] fg_voxels = voxel_centers[k_box_flag]
transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k] transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]
transformed_voxels = box_torch_ops.rotation_3d_in_axis( transformed_voxels = rotation_3d_in_axis(
transformed_voxels.unsqueeze(0), transformed_voxels.unsqueeze(0),
-gt_bboxes_3d.yaw[k].view(1), -gt_bboxes_3d.yaw[k].view(1),
axis=2) axis=2)
......
from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar, from .iou3d_utils import boxes_iou_bev, nms_gpu, nms_normal_gpu
boxes_iou_bev, nms_gpu, nms_normal_gpu)
__all__ = [ __all__ = ['boxes_iou_bev', 'nms_gpu', 'nms_normal_gpu']
'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
'boxes_iou3d_gpu_lidar'
]
...@@ -20,102 +20,6 @@ def boxes_iou_bev(boxes_a, boxes_b): ...@@ -20,102 +20,6 @@ def boxes_iou_bev(boxes_a, boxes_b):
return ans_iou return ans_iou
def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in camera coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)
# bev overlap
overlaps_bev = torch.cuda.FloatTensor(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
# height overlap
boxes_a_height_min = (boxes_a[:, 1] - boxes_a[:, 3]).view(-1, 1)
boxes_a_height_max = boxes_a[:, 1].view(-1, 1)
boxes_b_height_min = (boxes_b[:, 1] - boxes_b[:, 3]).view(1, -1)
boxes_b_height_max = boxes_b[:, 1].view(1, -1)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
"""Calculate 3d iou of boxes in lidar coordinate
Args:
boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
in LiDAR coordinate
boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
:Returns:
FloatTensor: (M, N)
"""
boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
# height overlap
boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
boxes_b_height_min = boxes_b[:, 2].view(1, -1)
# bev overlap
overlaps_bev = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) # (N, M)
iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
boxes_b_bev.contiguous(), overlaps_bev)
max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
# 3d iou
overlaps_3d = overlaps_bev * overlaps_h
volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
if mode == 'iou':
# the clamp func is used to avoid division of 0
iou3d = overlaps_3d / torch.clamp(
volume_a + volume_b - overlaps_3d, min=1e-8)
else:
iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
return iou3d
def nms_gpu(boxes, scores, thresh): def nms_gpu(boxes, scores, thresh):
""" """
:param boxes: (N, 5) [x1, y1, x2, y2, ry] :param boxes: (N, 5) [x1, y1, x2, y2, ry]
...@@ -148,41 +52,3 @@ def nms_normal_gpu(boxes, scores, thresh): ...@@ -148,41 +52,3 @@ def nms_normal_gpu(boxes, scores, thresh):
keep = torch.LongTensor(boxes.size(0)) keep = torch.LongTensor(boxes.size(0))
num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh) num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh)
return order[keep[:num_out].cuda()].contiguous() return order[keep[:num_out].cuda()].contiguous()
def boxes3d_to_bev_torch_camera(boxes3d):
"""covert boxes3d to bev in in camera coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
Return:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""covert boxes3d to bev in in LiDAR coords
Args:
boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
Returns:
FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
x, y = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='iou3d',
ext_modules=[
CUDAExtension(
'iou3d_cuda', [
'src/iou3d.cpp',
'src/iou3d_kernel.cu',
],
extra_compile_args={
'cxx': ['-g', '-I /usr/local/cuda/include'],
'nvcc': ['-O2']
})
],
cmdclass={'build_ext': BuildExtension})
...@@ -31,7 +31,8 @@ def points_in_boxes_gpu(points, boxes): ...@@ -31,7 +31,8 @@ def points_in_boxes_gpu(points, boxes):
def points_in_boxes_cpu(points, boxes): def points_in_boxes_cpu(points, boxes):
"""Find points that are in boxes (CPU) """Find points that are in boxes (CPU)
Note: Currently, the output of this function is different from that of Note:
Currently, the output of this function is different from that of
points_in_boxes_gpu. points_in_boxes_gpu.
Args: Args:
......
from mmcv.utils import Registry, build_from_cfg, print_log from mmcv.utils import Registry, build_from_cfg, print_log
from mmdet.utils import get_model_complexity_info from mmdet.utils import get_model_complexity_info, get_root_logger
from .collect_env import collect_env from .collect_env import collect_env
from .logger import get_root_logger
__all__ = [ __all__ = [
'Registry', 'build_from_cfg', 'get_model_complexity_info', 'Registry', 'build_from_cfg', 'get_model_complexity_info',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment