"src/git@developer.sourcefind.cn:one/TransferBench.git" did not exist on "b54395481a3d89c87c26b2e592553f17dc461682"
Unverified Commit e9f48a4f authored by Danila Rukhovich's avatar Danila Rukhovich Committed by GitHub
Browse files

[Enhance] Replace BEV IoU with 3D IoU (#1902)

* add iou3d

* revert deprecated python function

* fix lint

* replace 3d iou/nms calls for bev iou/nms
parent 7e6f4624
...@@ -24,6 +24,7 @@ We implement common CUDA ops used in detection, segmentation, etc. ...@@ -24,6 +24,7 @@ We implement common CUDA ops used in detection, segmentation, etc.
- MaskedConv - MaskedConv
- MinAreaPolygon - MinAreaPolygon
- NMS - NMS
- NMS3D
- PointsInPolygons - PointsInPolygons
- PSAMask - PSAMask
- RiRoIAlignRotated - RiRoIAlignRotated
......
...@@ -23,6 +23,7 @@ MMCV 提供了检测、分割等任务中常用的 CUDA 算子 ...@@ -23,6 +23,7 @@ MMCV 提供了检测、分割等任务中常用的 CUDA 算子
- MaskedConv - MaskedConv
- MinAreaPolygon - MinAreaPolygon
- NMS - NMS
- NMS3D
- PointsInPolygons - PointsInPolygons
- PSAMask - PSAMask
- RotatedFeatureAlign - RotatedFeatureAlign
......
...@@ -28,7 +28,8 @@ from .gather_points import gather_points ...@@ -28,7 +28,8 @@ from .gather_points import gather_points
from .group_points import GroupAll, QueryAndGroup, grouping_operation from .group_points import GroupAll, QueryAndGroup, grouping_operation
from .info import (get_compiler_version, get_compiling_cuda_version, from .info import (get_compiler_version, get_compiling_cuda_version,
get_onnxruntime_op_path) get_onnxruntime_op_path)
from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev from .iou3d import (boxes_iou3d, boxes_iou_bev, nms3d, nms3d_normal, nms_bev,
nms_normal_bev)
from .knn import knn from .knn import knn
from .masked_conv import MaskedConv2d, masked_conv2d from .masked_conv import MaskedConv2d, masked_conv2d
from .min_area_polygons import min_area_polygons from .min_area_polygons import min_area_polygons
...@@ -89,13 +90,14 @@ __all__ = [ ...@@ -89,13 +90,14 @@ __all__ = [
'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
'border_align', 'gather_points', 'furthest_point_sample', 'border_align', 'gather_points', 'furthest_point_sample',
'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', 'boxes_iou3d', 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'nms3d',
'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'nms3d_normal', 'Voxelization', 'voxelization', 'dynamic_scatter',
'SparseConv2d', 'SparseConv3d', 'SparseConvTranspose2d', 'DynamicScatter', 'RoIAwarePool3d', 'SparseConv2d', 'SparseConv3d',
'SparseConvTranspose3d', 'SparseInverseConv2d', 'SparseInverseConv3d', 'SparseConvTranspose2d', 'SparseConvTranspose3d', 'SparseInverseConv2d',
'SubMConv2d', 'SubMConv3d', 'SparseModule', 'SparseSequential', 'SparseInverseConv3d', 'SubMConv2d', 'SubMConv3d', 'SparseModule',
'SparseMaxPool2d', 'SparseMaxPool3d', 'SparseConvTensor', 'scatter_nd', 'SparseSequential', 'SparseMaxPool2d', 'SparseMaxPool3d',
'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all', 'SparseConvTensor', 'scatter_nd', 'points_in_boxes_part',
'points_in_polygons', 'min_area_polygons', 'active_rotated_filter', 'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_polygons',
'convex_iou', 'convex_giou', 'diff_iou_rotated_2d', 'diff_iou_rotated_3d' 'min_area_polygons', 'active_rotated_filter', 'convex_iou', 'convex_giou',
'diff_iou_rotated_2d', 'diff_iou_rotated_3d'
] ]
...@@ -50,21 +50,17 @@ __device__ int check_rect_cross(const Point &p1, const Point &p2, ...@@ -50,21 +50,17 @@ __device__ int check_rect_cross(const Point &p1, const Point &p2,
} }
__device__ inline int check_in_box2d(const float *box, const Point &p) { __device__ inline int check_in_box2d(const float *box, const Point &p) {
// params: box (5) [x1, y1, x2, y2, angle] // params: box (7) [x, y, z, dx, dy, dz, heading]
const float MARGIN = 1e-5; const float MARGIN = 1e-2;
float center_x = (box[0] + box[2]) / 2; float center_x = box[0], center_y = box[1];
float center_y = (box[1] + box[3]) / 2; // rotate the point in the opposite direction of box
float angle_cos = cos(-box[4]), float angle_cos = cos(-box[6]), angle_sin = sin(-box[6]);
angle_sin = float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
sin(-box[4]); // rotate the point in the opposite direction of box float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
float rot_x =
(p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x; return (fabs(rot_x) < box[3] / 2 + MARGIN &&
float rot_y = fabs(rot_y) < box[4] / 2 + MARGIN);
(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y;
return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN &&
rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN);
} }
__device__ inline int intersection(const Point &p1, const Point &p0, __device__ inline int intersection(const Point &p1, const Point &p0,
...@@ -116,16 +112,19 @@ __device__ inline int point_cmp(const Point &a, const Point &b, ...@@ -116,16 +112,19 @@ __device__ inline int point_cmp(const Point &a, const Point &b,
} }
__device__ inline float box_overlap(const float *box_a, const float *box_b) { __device__ inline float box_overlap(const float *box_a, const float *box_b) {
// params: box_a (5) [x1, y1, x2, y2, angle] // params box_a: [x, y, z, dx, dy, dz, heading]
// params: box_b (5) [x1, y1, x2, y2, angle] // params box_b: [x, y, z, dx, dy, dz, heading]
float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], float a_angle = box_a[6], b_angle = box_b[6];
a_angle = box_a[4]; float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
b_angle = box_b[4]; float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2); Point center_a(box_a[0], box_a[1]);
Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2); Point center_b(box_b[0], box_b[1]);
Point box_a_corners[5]; Point box_a_corners[5];
box_a_corners[0].set(a_x1, a_y1); box_a_corners[0].set(a_x1, a_y1);
...@@ -209,50 +208,36 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) { ...@@ -209,50 +208,36 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
} }
__device__ inline float iou_bev(const float *box_a, const float *box_b) { __device__ inline float iou_bev(const float *box_a, const float *box_b) {
// params: box_a (5) [x1, y1, x2, y2, angle] // params box_a: [x, y, z, dx, dy, dz, heading]
// params: box_b (5) [x1, y1, x2, y2, angle] // params box_b: [x, y, z, dx, dy, dz, heading]
float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]); float sa = box_a[3] * box_a[4];
float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]); float sb = box_b[3] * box_b[4];
float s_overlap = box_overlap(box_a, box_b); float s_overlap = box_overlap(box_a, box_b);
return s_overlap / fmaxf(sa + sb - s_overlap, EPS); return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
} }
__global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel( __global__ void iou3d_boxes_iou3d_forward_cuda_kernel(const int num_a,
const int num_a, const float *boxes_a, const int num_b, const float *boxes_a,
const float *boxes_b, float *ans_overlap) { const int num_b,
CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) { const float *boxes_b,
if (a_idx >= num_a || b_idx >= num_b) { float *ans_iou) {
return;
}
const float *cur_box_a = boxes_a + a_idx * 5;
const float *cur_box_b = boxes_b + b_idx * 5;
float s_overlap = box_overlap(cur_box_a, cur_box_b);
ans_overlap[a_idx * num_b + b_idx] = s_overlap;
}
}
__global__ void iou3d_boxes_iou_bev_forward_cuda_kernel(const int num_a,
const float *boxes_a,
const int num_b,
const float *boxes_b,
float *ans_iou) {
CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) { CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) {
if (a_idx >= num_a || b_idx >= num_b) { if (a_idx >= num_a || b_idx >= num_b) {
return; return;
} }
const float *cur_box_a = boxes_a + a_idx * 5; const float *cur_box_a = boxes_a + a_idx * 7;
const float *cur_box_b = boxes_b + b_idx * 5; const float *cur_box_b = boxes_b + b_idx * 7;
float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
} }
} }
__global__ void nms_forward_cuda_kernel(const int boxes_num, __global__ void iou3d_nms3d_forward_cuda_kernel(const int boxes_num,
const float nms_overlap_thresh, const float nms_overlap_thresh,
const float *boxes, const float *boxes,
unsigned long long *mask) { unsigned long long *mask) {
// params: boxes (N, 5) [x1, y1, x2, y2, ry] // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
// params: mask (N, N/THREADS_PER_BLOCK_NMS) // params: mask (N, N/THREADS_PER_BLOCK_NMS)
const int blocks = const int blocks =
(boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
...@@ -264,25 +249,29 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num, ...@@ -264,25 +249,29 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
THREADS_PER_BLOCK_NMS); THREADS_PER_BLOCK_NMS);
__shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
if (threadIdx.x < col_size) { if (threadIdx.x < col_size) {
block_boxes[threadIdx.x * 5 + 0] = block_boxes[threadIdx.x * 7 + 0] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
block_boxes[threadIdx.x * 5 + 1] = block_boxes[threadIdx.x * 7 + 1] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
block_boxes[threadIdx.x * 5 + 2] = block_boxes[threadIdx.x * 7 + 2] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
block_boxes[threadIdx.x * 5 + 3] = block_boxes[threadIdx.x * 7 + 3] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
block_boxes[threadIdx.x * 5 + 4] = block_boxes[threadIdx.x * 7 + 4] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
block_boxes[threadIdx.x * 7 + 5] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
block_boxes[threadIdx.x * 7 + 6] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < row_size) { if (threadIdx.x < row_size) {
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
const float *cur_box = boxes + cur_box_idx * 5; const float *cur_box = boxes + cur_box_idx * 7;
int i = 0; int i = 0;
unsigned long long t = 0; unsigned long long t = 0;
...@@ -291,7 +280,7 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num, ...@@ -291,7 +280,7 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
start = threadIdx.x + 1; start = threadIdx.x + 1;
} }
for (i = start; i < col_size; i++) { for (i = start; i < col_size; i++) {
if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
t |= 1ULL << i; t |= 1ULL << i;
} }
} }
...@@ -303,20 +292,24 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num, ...@@ -303,20 +292,24 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
} }
__device__ inline float iou_normal(float const *const a, float const *const b) { __device__ inline float iou_normal(float const *const a, float const *const b) {
float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); // params: a: [x, y, z, dx, dy, dz, heading]
float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); // params: b: [x, y, z, dx, dy, dz, heading]
float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2);
float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2),
bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2);
float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
float interS = width * height; float interS = width * height;
float Sa = (a[2] - a[0]) * (a[3] - a[1]); float Sa = a[3] * a[4];
float Sb = (b[2] - b[0]) * (b[3] - b[1]); float Sb = b[3] * b[4];
return interS / fmaxf(Sa + Sb - interS, EPS); return interS / fmaxf(Sa + Sb - interS, EPS);
} }
__global__ void nms_normal_forward_cuda_kernel(const int boxes_num, __global__ void iou3d_nms3d_normal_forward_cuda_kernel(
const float nms_overlap_thresh, const int boxes_num, const float nms_overlap_thresh, const float *boxes,
const float *boxes, unsigned long long *mask) {
unsigned long long *mask) { // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
// params: boxes (N, 5) [x1, y1, x2, y2, ry]
// params: mask (N, N/THREADS_PER_BLOCK_NMS) // params: mask (N, N/THREADS_PER_BLOCK_NMS)
const int blocks = const int blocks =
...@@ -329,25 +322,29 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num, ...@@ -329,25 +322,29 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
THREADS_PER_BLOCK_NMS); THREADS_PER_BLOCK_NMS);
__shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7];
if (threadIdx.x < col_size) { if (threadIdx.x < col_size) {
block_boxes[threadIdx.x * 5 + 0] = block_boxes[threadIdx.x * 7 + 0] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0];
block_boxes[threadIdx.x * 5 + 1] = block_boxes[threadIdx.x * 7 + 1] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1];
block_boxes[threadIdx.x * 5 + 2] = block_boxes[threadIdx.x * 7 + 2] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2];
block_boxes[threadIdx.x * 5 + 3] = block_boxes[threadIdx.x * 7 + 3] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3];
block_boxes[threadIdx.x * 5 + 4] = block_boxes[threadIdx.x * 7 + 4] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4];
block_boxes[threadIdx.x * 7 + 5] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5];
block_boxes[threadIdx.x * 7 + 6] =
boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6];
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < row_size) { if (threadIdx.x < row_size) {
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
const float *cur_box = boxes + cur_box_idx * 5; const float *cur_box = boxes + cur_box_idx * 7;
int i = 0; int i = 0;
unsigned long long t = 0; unsigned long long t = 0;
...@@ -356,7 +353,7 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num, ...@@ -356,7 +353,7 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
start = threadIdx.x + 1; start = threadIdx.x + 1;
} }
for (i = start; i < col_size; i++) { for (i = start; i < col_size; i++) {
if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh) {
t |= 1ULL << i; t |= 1ULL << i;
} }
} }
......
...@@ -564,73 +564,58 @@ REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA, ...@@ -564,73 +564,58 @@ REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA,
REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA, REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA,
group_points_backward_cuda); group_points_backward_cuda);
void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, void IoU3DBoxesIoU3DForwardCUDAKernelLauncher(const int num_a,
const Tensor boxes_a, const Tensor boxes_a,
const int num_b, const int num_b,
const Tensor boxes_b, const Tensor boxes_b,
Tensor ans_overlap); Tensor ans_iou);
void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a, void IoU3DNMS3DForwardCUDAKernelLauncher(const Tensor boxes,
const Tensor boxes_a, unsigned long long* mask,
const int num_b, int boxes_num,
const Tensor boxes_b, float nms_overlap_thresh);
Tensor ans_iou);
void IoU3DNMS3DNormalForwardCUDAKernelLauncher(const Tensor boxes,
void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, unsigned long long* mask,
unsigned long long* mask, int boxes_num, int boxes_num,
float nms_overlap_thresh); float nms_overlap_thresh);
void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, void iou3d_boxes_iou3d_forward_cuda(const int num_a, const Tensor boxes_a,
unsigned long long* mask, const int num_b, const Tensor boxes_b,
int boxes_num, Tensor ans_iou) {
float nms_overlap_thresh); IoU3DBoxesIoU3DForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
ans_iou);
void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_overlap) {
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
ans_overlap);
}; };
void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a, void iou3d_nms3d_forward_cuda(const Tensor boxes, unsigned long long* mask,
const int num_b, const Tensor boxes_b, int boxes_num, float nms_overlap_thresh) {
Tensor ans_iou) { IoU3DNMS3DForwardCUDAKernelLauncher(boxes, mask, boxes_num,
IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b, nms_overlap_thresh);
ans_iou);
}; };
void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long* mask, void iou3d_nms3d_normal_forward_cuda(const Tensor boxes,
int boxes_num, float nms_overlap_thresh) { unsigned long long* mask, int boxes_num,
IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh); float nms_overlap_thresh) {
IoU3DNMS3DNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num,
nms_overlap_thresh);
}; };
void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long* mask, void iou3d_boxes_iou3d_forward_impl(const int num_a, const Tensor boxes_a,
int boxes_num, float nms_overlap_thresh) { const int num_b, const Tensor boxes_b,
IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num, Tensor ans_iou);
nms_overlap_thresh);
};
void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_overlap);
void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_iou);
void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long* mask, void iou3d_nms3d_forward_impl(const Tensor boxes, unsigned long long* mask,
int boxes_num, float nms_overlap_thresh); int boxes_num, float nms_overlap_thresh);
void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long* mask, void iou3d_nms3d_normal_forward_impl(const Tensor boxes,
int boxes_num, float nms_overlap_thresh); unsigned long long* mask, int boxes_num,
float nms_overlap_thresh);
REGISTER_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, CUDA, REGISTER_DEVICE_IMPL(iou3d_boxes_iou3d_forward_impl, CUDA,
iou3d_boxes_overlap_bev_forward_cuda); iou3d_boxes_iou3d_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, CUDA, REGISTER_DEVICE_IMPL(iou3d_nms3d_forward_impl, CUDA, iou3d_nms3d_forward_cuda);
iou3d_boxes_iou_bev_forward_cuda); REGISTER_DEVICE_IMPL(iou3d_nms3d_normal_forward_impl, CUDA,
REGISTER_DEVICE_IMPL(iou3d_nms_forward_impl, CUDA, iou3d_nms_forward_cuda); iou3d_nms3d_normal_forward_cuda);
REGISTER_DEVICE_IMPL(iou3d_nms_normal_forward_impl, CUDA,
iou3d_nms_normal_forward_cuda);
void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample, void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
const Tensor xyz, const Tensor new_xyz, const Tensor xyz, const Tensor new_xyz,
......
...@@ -12,11 +12,11 @@ All Rights Reserved 2019-2020. ...@@ -12,11 +12,11 @@ All Rights Reserved 2019-2020.
#include "iou3d_cuda_kernel.cuh" #include "iou3d_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, void IoU3DBoxesIoU3DForwardCUDAKernelLauncher(const int num_a,
const Tensor boxes_a, const Tensor boxes_a,
const int num_b, const int num_b,
const Tensor boxes_b, const Tensor boxes_b,
Tensor ans_overlap) { Tensor ans_iou) {
at::cuda::CUDAGuard device_guard(boxes_a.device()); at::cuda::CUDAGuard device_guard(boxes_a.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
...@@ -25,36 +25,17 @@ void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a, ...@@ -25,36 +25,17 @@ void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a,
GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D)); GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D));
dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D); dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D);
iou3d_boxes_overlap_bev_forward_cuda_kernel<<<blocks, threads, 0, stream>>>( iou3d_boxes_iou3d_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
num_a, boxes_a.data_ptr<float>(), num_b, boxes_b.data_ptr<float>(),
ans_overlap.data_ptr<float>());
AT_CUDA_CHECK(cudaGetLastError());
}
void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a,
const Tensor boxes_a,
const int num_b,
const Tensor boxes_b,
Tensor ans_iou) {
at::cuda::CUDAGuard device_guard(boxes_a.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
// blockIdx.x(col), blockIdx.y(row)
dim3 blocks(GET_BLOCKS(num_b, THREADS_PER_BLOCK_IOU3D),
GET_BLOCKS(num_a, THREADS_PER_BLOCK_IOU3D));
dim3 threads(THREADS_PER_BLOCK_IOU3D, THREADS_PER_BLOCK_IOU3D);
iou3d_boxes_iou_bev_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
num_a, boxes_a.data_ptr<float>(), num_b, boxes_b.data_ptr<float>(), num_a, boxes_a.data_ptr<float>(), num_b, boxes_b.data_ptr<float>(),
ans_iou.data_ptr<float>()); ans_iou.data_ptr<float>());
AT_CUDA_CHECK(cudaGetLastError()); AT_CUDA_CHECK(cudaGetLastError());
} }
void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, void IoU3DNMS3DForwardCUDAKernelLauncher(const Tensor boxes,
unsigned long long *mask, int boxes_num, unsigned long long *mask,
float nms_overlap_thresh) { int boxes_num,
float nms_overlap_thresh) {
at::cuda::CUDAGuard device_guard(boxes.device()); at::cuda::CUDAGuard device_guard(boxes.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
...@@ -62,16 +43,16 @@ void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes, ...@@ -62,16 +43,16 @@ void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes,
GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS)); GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS));
dim3 threads(THREADS_PER_BLOCK_NMS); dim3 threads(THREADS_PER_BLOCK_NMS);
nms_forward_cuda_kernel<<<blocks, threads, 0, stream>>>( iou3d_nms3d_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask); boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask);
AT_CUDA_CHECK(cudaGetLastError()); AT_CUDA_CHECK(cudaGetLastError());
} }
void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, void IoU3DNMS3DNormalForwardCUDAKernelLauncher(const Tensor boxes,
unsigned long long *mask, unsigned long long *mask,
int boxes_num, int boxes_num,
float nms_overlap_thresh) { float nms_overlap_thresh) {
at::cuda::CUDAGuard device_guard(boxes.device()); at::cuda::CUDAGuard device_guard(boxes.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
...@@ -79,7 +60,7 @@ void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes, ...@@ -79,7 +60,7 @@ void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes,
GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS)); GET_BLOCKS(boxes_num, THREADS_PER_BLOCK_NMS));
dim3 threads(THREADS_PER_BLOCK_NMS); dim3 threads(THREADS_PER_BLOCK_NMS);
nms_normal_forward_cuda_kernel<<<blocks, threads, 0, stream>>>( iou3d_nms3d_normal_forward_cuda_kernel<<<blocks, threads, 0, stream>>>(
boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask); boxes_num, nms_overlap_thresh, boxes.data_ptr<float>(), mask);
AT_CUDA_CHECK(cudaGetLastError()); AT_CUDA_CHECK(cudaGetLastError());
......
...@@ -12,59 +12,39 @@ All Rights Reserved 2019-2020. ...@@ -12,59 +12,39 @@ All Rights Reserved 2019-2020.
const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a, void iou3d_boxes_iou3d_forward_impl(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b, const int num_b, const Tensor boxes_b,
Tensor ans_overlap) { Tensor ans_iou) {
DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a, DISPATCH_DEVICE_IMPL(iou3d_boxes_iou3d_forward_impl, num_a, boxes_a, num_b,
num_b, boxes_b, ans_overlap);
}
void iou3d_boxes_iou_bev_forward_impl(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_iou) {
DISPATCH_DEVICE_IMPL(iou3d_boxes_iou_bev_forward_impl, num_a, boxes_a, num_b,
boxes_b, ans_iou); boxes_b, ans_iou);
} }
void iou3d_nms_forward_impl(const Tensor boxes, unsigned long long *mask, void iou3d_nms3d_forward_impl(const Tensor boxes, unsigned long long *mask,
int boxes_num, float nms_overlap_thresh) { int boxes_num, float nms_overlap_thresh) {
DISPATCH_DEVICE_IMPL(iou3d_nms_forward_impl, boxes, mask, boxes_num, DISPATCH_DEVICE_IMPL(iou3d_nms3d_forward_impl, boxes, mask, boxes_num,
nms_overlap_thresh); nms_overlap_thresh);
} }
void iou3d_nms_normal_forward_impl(const Tensor boxes, unsigned long long *mask, void iou3d_nms3d_normal_forward_impl(const Tensor boxes,
int boxes_num, float nms_overlap_thresh) { unsigned long long *mask, int boxes_num,
DISPATCH_DEVICE_IMPL(iou3d_nms_normal_forward_impl, boxes, mask, boxes_num, float nms_overlap_thresh) {
DISPATCH_DEVICE_IMPL(iou3d_nms3d_normal_forward_impl, boxes, mask, boxes_num,
nms_overlap_thresh); nms_overlap_thresh);
} }
void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, void iou3d_boxes_iou3d_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou) {
Tensor ans_overlap) { // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
int num_a = boxes_a.size(0);
int num_b = boxes_b.size(0);
iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b,
ans_overlap);
}
void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_iou) {
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5) // params boxes_b: (M, 5)
// params ans_overlap: (N, M) // params ans_overlap: (N, M)
int num_a = boxes_a.size(0); int num_a = boxes_a.size(0);
int num_b = boxes_b.size(0); int num_b = boxes_b.size(0);
iou3d_boxes_iou_bev_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou); iou3d_boxes_iou3d_forward_impl(num_a, boxes_a, num_b, boxes_b, ans_iou);
} }
void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, void iou3d_nms3d_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) { float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry] // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
// params keep: (N) // params keep: (N)
CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(boxes);
CHECK_CONTIGUOUS(keep); CHECK_CONTIGUOUS(keep);
...@@ -80,7 +60,7 @@ void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, ...@@ -80,7 +60,7 @@ void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
unsigned long long *mask_data = unsigned long long *mask_data =
(unsigned long long *)mask.data_ptr<int64_t>(); (unsigned long long *)mask.data_ptr<int64_t>();
iou3d_nms_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh); iou3d_nms3d_forward_impl(boxes, mask_data, boxes_num, nms_overlap_thresh);
at::Tensor mask_cpu = mask.to(at::kCPU); at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long *mask_host = unsigned long long *mask_host =
...@@ -106,9 +86,9 @@ void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, ...@@ -106,9 +86,9 @@ void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
} }
} }
void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, void iou3d_nms3d_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) { float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry] // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
// params keep: (N) // params keep: (N)
CHECK_CONTIGUOUS(boxes); CHECK_CONTIGUOUS(boxes);
...@@ -125,8 +105,8 @@ void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, ...@@ -125,8 +105,8 @@ void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
unsigned long long *mask_data = unsigned long long *mask_data =
(unsigned long long *)mask.data_ptr<int64_t>(); (unsigned long long *)mask.data_ptr<int64_t>();
iou3d_nms_normal_forward_impl(boxes, mask_data, boxes_num, iou3d_nms3d_normal_forward_impl(boxes, mask_data, boxes_num,
nms_overlap_thresh); nms_overlap_thresh);
at::Tensor mask_cpu = mask.to(at::kCPU); at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long *mask_host = unsigned long long *mask_host =
......
...@@ -115,17 +115,14 @@ void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, ...@@ -115,17 +115,14 @@ void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor, void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
Tensor dist2_tensor, int b, int n, int m, int nsample); Tensor dist2_tensor, int b, int n, int m, int nsample);
void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_overlap);
void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, void iou3d_boxes_iou3d_forward(Tensor boxes_a, Tensor boxes_b, Tensor ans_iou);
Tensor ans_iou);
void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num, void iou3d_nms3d_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh); float nms_overlap_thresh);
void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num, void iou3d_nms3d_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh); float nms_overlap_thresh);
void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
Tensor idx_tensor, int b, int n, int m); Tensor idx_tensor, int b, int n, int m);
...@@ -535,17 +532,14 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -535,17 +532,14 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"), py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"),
py::arg("new_xyz_tensor"), py::arg("idx_tensor"), py::arg("new_xyz_tensor"), py::arg("idx_tensor"),
py::arg("dist2_tensor")); py::arg("dist2_tensor"));
m.def("iou3d_boxes_overlap_bev_forward", &iou3d_boxes_overlap_bev_forward, m.def("iou3d_boxes_iou3d_forward", &iou3d_boxes_iou3d_forward,
"iou3d_boxes_overlap_bev_forward", py::arg("boxes_a"), "iou3d_boxes_iou3d_forward", py::arg("boxes_a"), py::arg("boxes_b"),
py::arg("boxes_b"), py::arg("ans_overlap"));
m.def("iou3d_boxes_iou_bev_forward", &iou3d_boxes_iou_bev_forward,
"iou3d_boxes_iou_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"),
py::arg("ans_iou")); py::arg("ans_iou"));
m.def("iou3d_nms_forward", &iou3d_nms_forward, "iou3d_nms_forward", m.def("iou3d_nms3d_forward", &iou3d_nms3d_forward, "iou3d_nms3d_forward",
py::arg("boxes"), py::arg("keep"), py::arg("num_out"), py::arg("boxes"), py::arg("keep"), py::arg("num_out"),
py::arg("nms_overlap_thresh")); py::arg("nms_overlap_thresh"));
m.def("iou3d_nms_normal_forward", &iou3d_nms_normal_forward, m.def("iou3d_nms3d_normal_forward", &iou3d_nms3d_normal_forward,
"iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"), "iou3d_nms3d_normal_forward", py::arg("boxes"), py::arg("keep"),
py::arg("num_out"), py::arg("nms_overlap_thresh")); py::arg("num_out"), py::arg("nms_overlap_thresh"));
m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward, m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward,
"furthest_point_sampling_forward", py::arg("points_tensor"), "furthest_point_sampling_forward", py::arg("points_tensor"),
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import warnings
import torch import torch
from ..utils import ext_loader from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [ ext_module = ext_loader.load_ext('_ext', [
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', 'iou3d_boxes_iou3d_forward', 'iou3d_nms3d_forward',
'iou3d_nms_normal_forward' 'iou3d_nms3d_normal_forward'
]) ])
def boxes_iou_bev(boxes_a, boxes_b): def boxes_iou3d(boxes_a, boxes_b):
"""Calculate boxes IoU in the Bird's Eye View. """Calculate boxes 3D IoU.
Args: Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5). boxes_a (torch.Tensor): Input boxes a with shape (M, 7).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5). boxes_b (torch.Tensor): Input boxes b with shape (N, 7).
Returns: Returns:
torch.Tensor: IoU result with shape (M, N). torch.Tensor: IoU result with shape (M, N).
...@@ -22,68 +24,160 @@ def boxes_iou_bev(boxes_a, boxes_b): ...@@ -22,68 +24,160 @@ def boxes_iou_bev(boxes_a, boxes_b):
ans_iou = boxes_a.new_zeros( ans_iou = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), ext_module.iou3d_boxes_iou3d_forward(boxes_a.contiguous(),
boxes_b.contiguous(), ans_iou) boxes_b.contiguous(), ans_iou)
return ans_iou return ans_iou
def nms3d(boxes, scores, iou_threshold):
"""3D NMS function GPU implementation (for BEV boxes).
Args:
boxes (torch.Tensor): Input boxes with the shape of (N, 7)
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of boxes with the shape of (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Indexes after NMS.
"""
assert boxes.size(1) == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
return keep
def nms3d_normal(boxes, scores, iou_threshold):
"""Normal 3D NMS function GPU implementation. The overlap of two boxes for
IoU calculation is defined as the exact overlapping area of the two boxes
WITH their yaw angle set to 0.
Args:
boxes (torch.Tensor): Input boxes with shape (N, 7).
([x, y, z, dx, dy, dz, heading]).
scores (torch.Tensor): Scores of predicted boxes with shape (N).
iou_threshold (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Remaining indices with scores in descending order.
"""
assert boxes.shape[1] == 7, 'Input boxes shape should be (N, 7)'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms3d_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=iou_threshold)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
def _xyxyr2xywhr(boxes):
"""Convert [x1, y1, x2, y2, heading] box to [x, y, dx, dy, heading] box.
Args:
box (torch.Tensor): Input boxes with shape (N, 5).
Returns:
torch.Tensor: Converted boxes with shape (N, 7).
"""
warnings.warn(
'This function is deprecated and will be removed in the future.',
DeprecationWarning)
return torch.stack(
((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2,
boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], boxes[:, 4]),
dim=-1)
def boxes_iou_bev(boxes_a, boxes_b):
"""Calculate boxes IoU in the Bird's Eye View.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5)
([x1, y1, x2, y2, ry]).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5)
([x1, y1, x2, y2, ry]).
Returns:
torch.Tensor: IoU result with shape (M, N).
"""
from .box_iou_rotated import box_iou_rotated
warnings.warn(
'`iou3d.boxes_iou_bev` is deprecated and will be removed in'
' the future. Please, use `box_iou_rotated.box_iou_rotated`.',
DeprecationWarning)
return box_iou_rotated(_xyxyr2xywhr(boxes_a), _xyxyr2xywhr(boxes_b))
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
"""NMS function GPU implementation (for BEV boxes). The overlap of two """NMS function GPU implementation (for BEV boxes).
The overlap of two
boxes for IoU calculation is defined as the exact overlapping area of the boxes for IoU calculation is defined as the exact overlapping area of the
two boxes. In this function, one can also set ``pre_max_size`` and two boxes. In this function, one can also set ``pre_max_size`` and
``post_max_size``. ``post_max_size``.
Args: Args:
boxes (torch.Tensor): Input boxes with the shape of [N, 5] boxes (torch.Tensor): Input boxes with the shape of (N, 5)
([x1, y1, x2, y2, ry]). ([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of boxes with the shape of [N]. scores (torch.Tensor): Scores of boxes with the shape of (N,).
thresh (float): Overlap threshold of NMS. thresh (float): Overlap threshold of NMS.
pre_max_size (int, optional): Max size of boxes before NMS. pre_max_size (int, optional): Max size of boxes before NMS.
Default: None. Default: None.
post_max_size (int, optional): Max size of boxes after NMS. post_max_size (int, optional): Max size of boxes after NMS.
Default: None. Default: None.
Returns: Returns:
torch.Tensor: Indexes after NMS. torch.Tensor: Indexes after NMS.
""" """
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' from .nms import nms_rotated
warnings.warn(
'`iou3d.nms_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms_rotated`.', DeprecationWarning)
assert boxes.size(1) == 5, 'Input boxes shape should be (N, 5)'
order = scores.sort(0, descending=True)[1] order = scores.sort(0, descending=True)[1]
if pre_max_size is not None: if pre_max_size is not None:
order = order[:pre_max_size] order = order[:pre_max_size]
boxes = boxes[order].contiguous() boxes = _xyxyr2xywhr(boxes)[order]
scores = scores[order]
keep = nms_rotated(boxes, scores, thresh)[1]
keep = order[keep]
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
if post_max_size is not None: if post_max_size is not None:
keep = keep[:post_max_size] keep = keep[:post_max_size]
return keep return keep
def nms_normal_bev(boxes, scores, thresh): def nms_normal_bev(boxes, scores, thresh):
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of """Normal NMS function GPU implementation (for BEV boxes).
The overlap of
two boxes for IoU calculation is defined as the exact overlapping area of two boxes for IoU calculation is defined as the exact overlapping area of
the two boxes WITH their yaw angle set to 0. the two boxes WITH their yaw angle set to 0.
Args: Args:
boxes (torch.Tensor): Input boxes with shape (N, 5). boxes (torch.Tensor): Input boxes with shape (N, 5)
scores (torch.Tensor): Scores of predicted boxes with shape (N). ([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of predicted boxes with shape (N,).
thresh (float): Overlap threshold of NMS. thresh (float): Overlap threshold of NMS.
Returns: Returns:
torch.Tensor: Remaining indices with scores in descending order. torch.Tensor: Remaining indices with scores in descending order.
""" """
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' from .nms import nms
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous() warnings.warn(
'`iou3d.nms_normal_bev` is deprecated and will be removed in'
' the future. Please, use `nms.nms`.', DeprecationWarning)
assert boxes.shape[1] == 5, 'Input boxes shape should be (N, 5)'
keep = torch.zeros(boxes.size(0), dtype=torch.long) return nms(boxes[:, :-1], scores, thresh)[1]
num_out = torch.zeros(size=(), dtype=torch.long)
ext_module.iou3d_nms_normal_forward(
boxes, keep, num_out, nms_overlap_thresh=thresh)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
...@@ -3,59 +3,84 @@ import numpy as np ...@@ -3,59 +3,84 @@ import numpy as np
import pytest import pytest
import torch import torch
from mmcv.ops import boxes_iou_bev, nms_bev, nms_normal_bev from mmcv.ops import boxes_iou3d, nms3d, nms3d_normal
@pytest.mark.skipif( @pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support') not torch.cuda.is_available(), reason='requires CUDA support')
def test_boxes_iou_bev(): def test_boxes_iou3d():
np_boxes1 = np.asarray( np_boxes1 = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0],
[[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6], [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0],
[7.0, 7.0, 8.0, 8.0, 0.4]], [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0]],
dtype=np.float32) dtype=np.float32)
np_boxes2 = np.asarray( np_boxes2 = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0],
[[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5], [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, np.pi / 2],
[5.0, 5.0, 6.0, 7.0, 0.4]], [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, np.pi / 4]],
dtype=np.float32) dtype=np.float32)
np_expect_ious = np.asarray( np_expect_ious = np.asarray([[1.0, 1.0, 1.0 / 2**0.5],
[[0.2621, 0.2948, 0.0000], [0.0549, 0.1587, 0.0000], [1.0 / 7, 1.0 / 7, 1.0 / 7], [0.0, 0.0, 0.0]],
[0.0000, 0.0000, 0.0000]], dtype=np.float32)
dtype=np.float32)
boxes1 = torch.from_numpy(np_boxes1).cuda() boxes1 = torch.from_numpy(np_boxes1).cuda()
boxes2 = torch.from_numpy(np_boxes2).cuda() boxes2 = torch.from_numpy(np_boxes2).cuda()
ious = boxes_iou_bev(boxes1, boxes2) ious = boxes_iou3d(boxes1, boxes2)
assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4) assert np.allclose(ious.cpu().numpy(), np_expect_ious, atol=1e-4)
@pytest.mark.skipif( @pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support') not torch.cuda.is_available(), reason='requires CUDA support')
def test_nms_bev(): def test_nms3d():
np_boxes = np.array( # test for 5 boxes
[[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0], np_boxes = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0],
[3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]], [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0],
dtype=np.float32) [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.3],
np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0],
[3.0, 3.2, 3.2, 3.0, 2.0, 2.0, 0.3]],
dtype=np.float32)
np_scores = np.array([0.6, 0.9, 0.1, 0.2, 0.15], dtype=np.float32)
np_inds = np.array([1, 0, 3]) np_inds = np.array([1, 0, 3])
boxes = torch.from_numpy(np_boxes) boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores) scores = torch.from_numpy(np_scores)
inds = nms_bev(boxes.cuda(), scores.cuda(), thresh=0.3) inds = nms3d(boxes.cuda(), scores.cuda(), iou_threshold=0.3)
assert np.allclose(inds.cpu().numpy(), np_inds) assert np.allclose(inds.cpu().numpy(), np_inds)
# test for many boxes
np.random.seed(42)
np_boxes = np.random.rand(555, 7).astype(np.float32)
np_scores = np.random.rand(555).astype(np.float32)
boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores)
inds = nms3d(boxes.cuda(), scores.cuda(), iou_threshold=0.3)
assert len(inds.cpu().numpy()) == 176
@pytest.mark.skipif( @pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support') not torch.cuda.is_available(), reason='requires CUDA support')
def test_nms_normal_bev(): def test_nms3d_normal():
np_boxes = np.array( # test for 5 boxes
[[6.0, 3.0, 8.0, 7.0, 2.0], [3.0, 6.0, 9.0, 11.0, 1.0], np_boxes = np.asarray([[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 0.0],
[3.0, 7.0, 10.0, 12.0, 1.0], [1.0, 4.0, 13.0, 7.0, 3.0]], [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0],
dtype=np.float32) [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.3],
np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32) [3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 0.0],
[3.0, 3.2, 3.2, 3.0, 2.0, 2.0, 0.3]],
dtype=np.float32)
np_scores = np.array([0.6, 0.9, 0.1, 0.2, 0.15], dtype=np.float32)
np_inds = np.array([1, 0, 3]) np_inds = np.array([1, 0, 3])
boxes = torch.from_numpy(np_boxes) boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores) scores = torch.from_numpy(np_scores)
inds = nms_normal_bev(boxes.cuda(), scores.cuda(), thresh=0.3) inds = nms3d_normal(boxes.cuda(), scores.cuda(), iou_threshold=0.3)
assert np.allclose(inds.cpu().numpy(), np_inds) assert np.allclose(inds.cpu().numpy(), np_inds)
# test for many boxes
np.random.seed(42)
np_boxes = np.random.rand(555, 7).astype(np.float32)
np_scores = np.random.rand(555).astype(np.float32)
boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores)
inds = nms3d_normal(boxes.cuda(), scores.cuda(), iou_threshold=0.3)
assert len(inds.cpu().numpy()) == 148
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment