Unverified Commit 0ce29733 authored by pc's avatar pc Committed by GitHub
Browse files

add group_points, iou3d, roiaware_pool3d and voxelize in parrots (#1504)

parent 9cad97bc
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
int nsample, const Tensor points,
const Tensor idx, Tensor out);
void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample,
const Tensor points, const Tensor idx,
Tensor out) {
GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx,
out);
};
void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
int nsample, const Tensor grad_out,
const Tensor idx,
Tensor grad_points);
void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample,
const Tensor grad_out, const Tensor idx,
Tensor grad_points) {
GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out,
idx, grad_points);
};
#endif
void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor out_tensor, int b, int c, int n, int npoints,
int nsample) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
group_points_forward_cuda(b, c, n, npoints, nsample, points_tensor,
idx_tensor, out_tensor);
#else
AT_ERROR("group_points is not compiled with GPU support");
#endif
} else {
AT_ERROR("group_points is not implemented on CPU");
}
}
void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_points_tensor, int b, int c, int n,
int npoints, int nsample) {
if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
group_points_backward_cuda(b, c, n, npoints, nsample, grad_out_tensor,
idx_tensor, grad_points_tensor);
#else
AT_ERROR("group_points is not compiled with GPU support");
#endif
} else {
AT_ERROR("group_points is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "group_points_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void group_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, n, npoints, nsample;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("n", n)
.get<int>("npoints", npoints)
.get<int>("nsample", nsample)
.done();
auto points_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto out_tensor = buildATensor(ctx, outs[0]);
group_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n, npoints,
nsample);
}
void group_points_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, n, npoints, nsample;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("n", n)
.get<int>("npoints", npoints)
.get<int>("nsample", nsample)
.done();
auto grad_out_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto grad_points_tensor = buildATensor(ctx, outs[0]);
group_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
n, npoints, nsample);
}
PARROTS_EXTENSION_REGISTER(group_points_forward)
.attr("b")
.attr("c")
.attr("n")
.attr("npoints")
.attr("nsample")
.input(2)
.output(1)
.apply(group_points_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(group_points_backward)
.attr("b")
.attr("c")
.attr("n")
.attr("npoints")
.attr("nsample")
.input(2)
.output(1)
.apply(group_points_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef GROUP_POINTS_PYTORCH_H
#define GROUP_POINTS_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor out_tensor, int b, int c, int n, int npoints,
int nsample);
void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_points_tensor, int b, int c, int n,
int npoints, int nsample);
#endif // GROUP_POINTS_PYTORCH_H
// Modified from
// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/
#include "pytorch_cpp_helper.hpp"
const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
#ifdef MMCV_WITH_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#define CHECK_ERROR(state) \
{ gpuAssert((state), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line,
bool abort = true) {
if (code != cudaSuccess) {
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
line);
if (abort) exit(code);
}
}
void IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(const int num_a,
const Tensor boxes_a,
const int num_b,
const Tensor boxes_b,
Tensor ans_overlap);
void iou3d_boxes_overlap_bev_forward_cuda(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_overlap) {
IoU3DBoxesOverlapBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
ans_overlap);
};
void IoU3DBoxesIoUBevForwardCUDAKernelLauncher(const int num_a,
const Tensor boxes_a,
const int num_b,
const Tensor boxes_b,
Tensor ans_iou);
void iou3d_boxes_iou_bev_forward_cuda(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_iou) {
IoU3DBoxesIoUBevForwardCUDAKernelLauncher(num_a, boxes_a, num_b, boxes_b,
ans_iou);
};
void IoU3DNMSForwardCUDAKernelLauncher(const Tensor boxes,
unsigned long long *mask, int boxes_num,
float nms_overlap_thresh);
void iou3d_nms_forward_cuda(const Tensor boxes, unsigned long long *mask,
int boxes_num, float nms_overlap_thresh) {
IoU3DNMSForwardCUDAKernelLauncher(boxes, mask, boxes_num, nms_overlap_thresh);
};
void IoU3DNMSNormalForwardCUDAKernelLauncher(const Tensor boxes,
unsigned long long *mask,
int boxes_num,
float nms_overlap_thresh);
void iou3d_nms_normal_forward_cuda(const Tensor boxes, unsigned long long *mask,
int boxes_num, float nms_overlap_thresh) {
IoU3DNMSNormalForwardCUDAKernelLauncher(boxes, mask, boxes_num,
nms_overlap_thresh);
};
#endif
void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_overlap) {
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
if (boxes_a.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes_a);
CHECK_CUDA_INPUT(boxes_b);
CHECK_CUDA_INPUT(ans_overlap);
int num_a = boxes_a.size(0);
int num_b = boxes_b.size(0);
iou3d_boxes_overlap_bev_forward_cuda(num_a, boxes_a, num_b, boxes_b,
ans_overlap);
#else
AT_ERROR("iou3d_boxes_overlap_bev is not compiled with GPU support");
#endif
} else {
AT_ERROR("iou3d_boxes_overlap_bev is not implemented on CPU");
}
}
void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_iou) {
// params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
// params boxes_b: (M, 5)
// params ans_overlap: (N, M)
if (boxes_a.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes_a);
CHECK_CUDA_INPUT(boxes_b);
CHECK_CUDA_INPUT(ans_iou);
int num_a = boxes_a.size(0);
int num_b = boxes_b.size(0);
iou3d_boxes_iou_bev_forward_cuda(num_a, boxes_a, num_b, boxes_b, ans_iou);
#else
AT_ERROR("iou3d_boxes_iou_bev is not compiled with GPU support");
#endif
} else {
AT_ERROR("iou3d_boxes_iou_bev is not implemented on CPU");
}
}
void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
if (boxes.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes);
CHECK_CONTIGUOUS(keep);
int boxes_num = boxes.size(0);
int64_t *keep_data = keep.data_ptr<int64_t>();
int64_t *keep_num_data = keep_num.data_ptr<int64_t>();
const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
Tensor mask =
at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
unsigned long long *mask_data =
(unsigned long long *)mask.data_ptr<int64_t>();
iou3d_nms_forward_cuda(boxes, mask_data, boxes_num, nms_overlap_thresh);
at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long *mask_host =
(unsigned long long *)mask_cpu.data_ptr<int64_t>();
std::vector<unsigned long long> remv_cpu(col_blocks);
memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);
int num_to_keep = 0;
for (int i = 0; i < boxes_num; i++) {
int nblock = i / THREADS_PER_BLOCK_NMS;
int inblock = i % THREADS_PER_BLOCK_NMS;
if (!(remv_cpu[nblock] & (1ULL << inblock))) {
keep_data[num_to_keep++] = i;
unsigned long long *p = &mask_host[0] + i * col_blocks;
for (int j = nblock; j < col_blocks; j++) {
remv_cpu[j] |= p[j];
}
}
}
if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
*keep_num_data = num_to_keep;
#else
AT_ERROR("iou3d_nms is not compiled with GPU support");
#endif
} else {
AT_ERROR("iou3d_nms is not implemented on CPU");
}
}
void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N)
if (boxes.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes);
CHECK_CONTIGUOUS(keep);
int boxes_num = boxes.size(0);
int64_t *keep_data = keep.data_ptr<int64_t>();
int64_t *keep_num_data = keep_num.data_ptr<int64_t>();
const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
Tensor mask =
at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong));
unsigned long long *mask_data =
(unsigned long long *)mask.data_ptr<int64_t>();
iou3d_nms_normal_forward_cuda(boxes, mask_data, boxes_num,
nms_overlap_thresh);
at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long *mask_host =
(unsigned long long *)mask_cpu.data_ptr<int64_t>();
std::vector<unsigned long long> remv_cpu(col_blocks);
memset(&remv_cpu[0], 0, sizeof(unsigned long long) * col_blocks);
int num_to_keep = 0;
for (int i = 0; i < boxes_num; i++) {
int nblock = i / THREADS_PER_BLOCK_NMS;
int inblock = i % THREADS_PER_BLOCK_NMS;
if (!(remv_cpu[nblock] & (1ULL << inblock))) {
keep_data[num_to_keep++] = i;
unsigned long long *p = &mask_host[0] + i * col_blocks;
for (int j = nblock; j < col_blocks; j++) {
remv_cpu[j] |= p[j];
}
}
}
if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
*keep_num_data = num_to_keep;
#else
AT_ERROR("iou3d_nms_normal is not compiled with GPU support");
#endif
} else {
AT_ERROR("iou3d_nms_normal is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "iou3d_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void iou3d_boxes_iou_bev_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto boxes_a = buildATensor(ctx, ins[0]);
auto boxes_b = buildATensor(ctx, ins[1]);
auto ans_iou = buildATensor(ctx, outs[0]);
iou3d_boxes_iou_bev_forward(boxes_a, boxes_b, ans_iou);
}
void iou3d_nms_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float nms_overlap_thresh;
SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();
auto boxes = buildATensor(ctx, ins[0]);
auto keep = buildATensor(ctx, outs[0]);
auto keep_num = buildATensor(ctx, outs[1]);
iou3d_nms_forward(boxes, keep, keep_num, nms_overlap_thresh);
}
void iou3d_nms_normal_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float nms_overlap_thresh;
SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();
auto boxes = buildATensor(ctx, ins[0]);
auto keep = buildATensor(ctx, outs[0]);
auto keep_num = buildATensor(ctx, outs[1]);
iou3d_nms_normal_forward(boxes, keep, keep_num, nms_overlap_thresh);
}
PARROTS_EXTENSION_REGISTER(iou3d_boxes_iou_bev_forward)
.input(2)
.output(1)
.apply(iou3d_boxes_iou_bev_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(iou3d_nms_forward)
.attr("nms_overlap_thresh")
.input(1)
.output(2)
.apply(iou3d_nms_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(iou3d_nms_normal_forward)
.attr("nms_overlap_thresh")
.input(1)
.output(2)
.apply(iou3d_nms_normal_forward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef IOU_3D_PYTORCH_H
#define IOU_3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_iou);
void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh);
void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh);
#endif // IOU_3D_PYTORCH_H
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num,
int pts_num, const Tensor boxes,
const Tensor pts,
Tensor box_idx_of_points);
void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num,
int pts_num, const Tensor boxes,
const Tensor pts,
Tensor box_idx_of_points) {
PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
boxes, pts, box_idx_of_points);
};
void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num,
int pts_num, const Tensor boxes,
const Tensor pts,
Tensor box_idx_of_points);
void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num,
int pts_num, const Tensor boxes,
const Tensor pts,
Tensor box_idx_of_points) {
PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
boxes, pts, box_idx_of_points);
};
#endif
void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor box_idx_of_points_tensor) {
// params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
// coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
// [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
// default -1
if (pts_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes_tensor);
CHECK_CUDA_INPUT(pts_tensor);
CHECK_CUDA_INPUT(box_idx_of_points_tensor);
int batch_size = boxes_tensor.size(0);
int boxes_num = boxes_tensor.size(1);
int pts_num = pts_tensor.size(1);
const float *boxes = boxes_tensor.data_ptr<float>();
const float *pts = pts_tensor.data_ptr<float>();
int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();
points_in_boxes_part_forward_cuda(batch_size, boxes_num, pts_num,
boxes_tensor, pts_tensor,
box_idx_of_points_tensor);
#else
AT_ERROR("points_in_boxes_part is not compiled with GPU support");
#endif
} else {
AT_ERROR("points_in_boxes_part is not implemented on CPU");
}
}
void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor box_idx_of_points_tensor) {
// params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
// coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z]
// in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1
if (pts_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes_tensor);
CHECK_CUDA_INPUT(pts_tensor);
CHECK_CUDA_INPUT(box_idx_of_points_tensor);
int batch_size = boxes_tensor.size(0);
int boxes_num = boxes_tensor.size(1);
int pts_num = pts_tensor.size(1);
const float *boxes = boxes_tensor.data_ptr<float>();
const float *pts = pts_tensor.data_ptr<float>();
int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();
points_in_boxes_all_forward_cuda(batch_size, boxes_num, pts_num,
boxes_tensor, pts_tensor,
box_idx_of_points_tensor);
#else
AT_ERROR("points_in_boxes_all is not compiled with GPU support");
#endif
} else {
AT_ERROR("points_in_boxes_all is not implemented on CPU");
}
}
#include "pytorch_cpp_helper.hpp"
inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz,
float &local_x, float &local_y) {
float cosa = cos(-rz), sina = sin(-rz);
local_x = shift_x * cosa + shift_y * (-sina);
local_y = shift_x * sina + shift_y * cosa;
}
inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d,
float &local_x, float &local_y) {
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
// cz in the bottom center
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6];
cz += z_size /
2.0; // shift to the center since cz in box3d is the bottom center
if (fabsf(z - cz) > z_size / 2.0) return 0;
lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) &
(local_y > -y_size / 2.0) & (local_y < y_size / 2.0);
return in_flag;
}
void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor pts_indices_tensor) {
// params boxes: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
// coordinate, z is the bottom center, each box DO NOT overlaps params pts:
// (npoints, 3) [x, y, z] in LiDAR coordinate params pts_indices: (N, npoints)
CHECK_CONTIGUOUS(boxes_tensor);
CHECK_CONTIGUOUS(pts_tensor);
CHECK_CONTIGUOUS(pts_indices_tensor);
int boxes_num = boxes_tensor.size(0);
int pts_num = pts_tensor.size(0);
const float *boxes = boxes_tensor.data_ptr<float>();
const float *pts = pts_tensor.data_ptr<float>();
int *pts_indices = pts_indices_tensor.data_ptr<int>();
float local_x = 0, local_y = 0;
for (int i = 0; i < boxes_num; i++) {
for (int j = 0; j < pts_num; j++) {
int cur_in_flag =
check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);
pts_indices[i * pts_num + j] = cur_in_flag;
}
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "points_in_boxes_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void points_in_boxes_part_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto boxes_tensor = buildATensor(ctx, ins[0]);
auto pts_tensor = buildATensor(ctx, ins[1]);
auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);
points_in_boxes_part_forward(boxes_tensor, pts_tensor,
box_idx_of_points_tensor);
}
void points_in_boxes_all_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto boxes_tensor = buildATensor(ctx, ins[0]);
auto pts_tensor = buildATensor(ctx, ins[1]);
auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);
points_in_boxes_all_forward(boxes_tensor, pts_tensor,
box_idx_of_points_tensor);
}
PARROTS_EXTENSION_REGISTER(points_in_boxes_part_forward)
.input(2)
.output(1)
.apply(points_in_boxes_part_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(points_in_boxes_all_forward)
.input(2)
.output(1)
.apply(points_in_boxes_all_forward_cuda_parrots)
.done();
#endif
void points_in_boxes_forward_cpu_parrots(HostContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto boxes_tensor = buildATensor(ctx, ins[0]);
auto pts_tensor = buildATensor(ctx, ins[1]);
auto pts_indices_tensor = buildATensor(ctx, outs[0]);
points_in_boxes_cpu_forward(boxes_tensor, pts_tensor, pts_indices_tensor);
}
PARROTS_EXTENSION_REGISTER(points_in_boxes_cpu_forward)
.input(2)
.output(1)
.apply(points_in_boxes_forward_cpu_parrots)
.done();
// Copyright (c) OpenMMLab. All rights reserved
#ifndef POINTS_IN_BOXES_PYTORCH_H
#define POINTS_IN_BOXES_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor box_idx_of_points_tensor);
void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor box_idx_of_points_tensor);
void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
Tensor pts_indices_tensor);
#endif // POINTS_IN_BOXES_PYTORCH_H
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void RoiawarePool3dForwardCUDAKernelLauncher(
int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const Tensor rois, const Tensor pts,
const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method);
void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const Tensor rois,
const Tensor pts, const Tensor pts_feature,
Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method) {
RoiawarePool3dForwardCUDAKernelLauncher(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features,
pool_method);
};
void RoiawarePool3dBackwardCUDAKernelLauncher(
int boxes_num, int out_x, int out_y, int out_z, int channels,
int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
const Tensor grad_out, Tensor grad_in, int pool_method);
void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y,
int out_z, int channels,
int max_pts_each_voxel,
const Tensor pts_idx_of_voxels,
const Tensor argmax, const Tensor grad_out,
Tensor grad_in, int pool_method) {
RoiawarePool3dBackwardCUDAKernelLauncher(
boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method);
};
#endif
void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method) {
// params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR
// coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
if (pts.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(pts);
CHECK_CUDA_INPUT(pts_feature);
CHECK_CUDA_INPUT(argmax);
CHECK_CUDA_INPUT(pts_idx_of_voxels);
CHECK_CUDA_INPUT(pooled_features);
int boxes_num = rois.size(0);
int pts_num = pts.size(0);
int channels = pts_feature.size(1);
int max_pts_each_voxel =
pts_idx_of_voxels.size(4); // index 0 is the counter
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
assert((out_x < 256) && (out_y < 256) &&
(out_z < 256)); // we encode index with 8bit
roiaware_pool3d_forward_cuda(boxes_num, pts_num, channels,
max_pts_each_voxel, out_x, out_y, out_z, rois,
pts, pts_feature, argmax, pts_idx_of_voxels,
pooled_features, pool_method);
#else
AT_ERROR("roiaware_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roiaware_pool3d is not implemented on CPU");
}
}
void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
Tensor grad_out, Tensor grad_in,
int pool_method) {
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
if (grad_in.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(pts_idx_of_voxels);
CHECK_CUDA_INPUT(argmax);
CHECK_CUDA_INPUT(grad_out);
CHECK_CUDA_INPUT(grad_in);
int boxes_num = pts_idx_of_voxels.size(0);
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
int max_pts_each_voxel =
pts_idx_of_voxels.size(4); // index 0 is the counter
int channels = grad_out.size(4);
roiaware_pool3d_backward_cuda(boxes_num, out_x, out_y, out_z, channels,
max_pts_each_voxel, pts_idx_of_voxels, argmax,
grad_out, grad_in, pool_method);
#else
AT_ERROR("roiaware_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roiaware_pool3d is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "roiaware_pool3d_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void roiaware_pool3d_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int pool_method;
SSAttrs(attr).get<int>("pool_method", pool_method).done();
auto rois = buildATensor(ctx, ins[0]);
auto pts = buildATensor(ctx, ins[1]);
auto pts_feature = buildATensor(ctx, ins[2]);
auto argmax = buildATensor(ctx, outs[0]);
auto pts_idx_of_voxels = buildATensor(ctx, outs[1]);
auto pooled_features = buildATensor(ctx, outs[2]);
roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels,
pooled_features, pool_method);
}
void roiaware_pool3d_backward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int pool_method;
SSAttrs(attr).get<int>("pool_method", pool_method).done();
auto pts_idx_of_voxels = buildATensor(ctx, ins[0]);
auto argmax = buildATensor(ctx, ins[1]);
auto grad_out = buildATensor(ctx, ins[2]);
auto grad_in = buildATensor(ctx, outs[0]);
roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out, grad_in,
pool_method);
}
PARROTS_EXTENSION_REGISTER(roiaware_pool3d_forward)
.attr("pool_method")
.input(3)
.output(3)
.apply(roiaware_pool3d_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(roiaware_pool3d_backward)
.attr("pool_method")
.input(3)
.output(1)
.apply(roiaware_pool3d_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIAWARE_POOL3D_PYTORCH_H
#define ROIAWARE_POOL3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method);
void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
Tensor grad_out, Tensor grad_in, int pool_method);
#endif // ROIAWARE_POOL3D_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
int HardVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const int max_points,
const int max_voxels, const int NDim = 3);
int hard_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
return HardVoxelizeForwardCUDAKernelLauncher(
points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim);
};
void DynamicVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const int NDim = 3);
void dynamic_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3) {
DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
coors_range, NDim);
};
#endif
int hard_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3);
void dynamic_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3);
void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &voxels,
at::Tensor &coors, at::Tensor &num_points_per_voxel,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3) {
int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
voxel_size.data_ptr<float>() + voxel_size.numel());
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
*voxel_num_data = hard_voxelize_forward_cuda(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
#else
AT_ERROR("hard_voxelize is not compiled with GPU support");
#endif
} else {
*voxel_num_data = hard_voxelize_forward_cpu(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
}
}
void dynamic_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &coors,
const int NDim = 3) {
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
voxel_size.data_ptr<float>() + voxel_size.numel());
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
dynamic_voxelize_forward_cuda(points, coors, voxel_size_v, coors_range_v,
NDim);
#else
AT_ERROR("dynamic_voxelize is not compiled with GPU support");
#endif
} else {
dynamic_voxelize_forward_cpu(points, coors, voxel_size_v, coors_range_v,
NDim);
}
}
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
template <typename T, typename T_int>
void dynamic_voxelize_forward_cpu_kernel(
const torch::TensorAccessor<T, 2> points,
torch::TensorAccessor<T_int, 2> coors, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const std::vector<int> grid_size,
const int num_points, const int num_features, const int NDim) {
const int ndim_minus_1 = NDim - 1;
bool failed = false;
// int coor[NDim];
int* coor = new int[NDim]();
int c;
for (int i = 0; i < num_points; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points[i][j] - coors_range[j]) / voxel_size[j]);
// necessary to rm points out of range
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
memset(&coors[i][0], -1, NDim * sizeof(T_int));
else
memcpy(&coors[i][0], &coor[0], NDim * sizeof(T_int));
}
delete[] coor;
}
template <typename T, typename T_int>
void hard_voxelize_forward_cpu_kernel(
const torch::TensorAccessor<T, 2> points,
torch::TensorAccessor<T, 3> voxels, torch::TensorAccessor<T_int, 2> coors,
torch::TensorAccessor<T_int, 1> num_points_per_voxel,
torch::TensorAccessor<T_int, 3> coor_to_voxelidx, int& voxel_num,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const std::vector<int> grid_size, const int max_points,
const int max_voxels, const int num_points, const int num_features,
const int NDim) {
// declare a temp coors
at::Tensor temp_coors = at::zeros(
{num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));
// First use dynamic voxelization to get coors,
// then check max points/voxels constraints
dynamic_voxelize_forward_cpu_kernel<T, int>(
points, temp_coors.accessor<int, 2>(), voxel_size, coors_range, grid_size,
num_points, num_features, NDim);
int voxelidx, num;
auto coor = temp_coors.accessor<int, 2>();
for (int i = 0; i < num_points; ++i) {
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
if (coor[i][0] == -1) continue;
voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];
// record voxel
if (voxelidx == -1) {
voxelidx = voxel_num;
if (max_voxels != -1 && voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;
memcpy(&coors[voxelidx][0], &coor[i][0], NDim * sizeof(T_int));
}
// put points into voxel
num = num_points_per_voxel[voxelidx];
if (max_points == -1 || num < max_points) {
memcpy(&voxels[voxelidx][num][0], &points[i][0],
num_features * sizeof(T));
num_points_per_voxel[voxelidx] += 1;
}
}
return;
}
void dynamic_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3) {
// check device
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");
std::vector<int> grid_size(NDim);
const int num_points = points.size(0);
const int num_features = points.size(1);
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
points.scalar_type(), "dynamic_voxelize_forward_cpu_kernel", [&] {
dynamic_voxelize_forward_cpu_kernel<scalar_t, int>(
points.accessor<scalar_t, 2>(), coors.accessor<int, 2>(),
voxel_size, coors_range, grid_size, num_points, num_features, NDim);
});
}
int hard_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& voxels,
at::Tensor& coors,
at::Tensor& num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");
std::vector<int> grid_size(NDim);
const int num_points = points.size(0);
const int num_features = points.size(1);
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
// grid_size[1], grid_size[0]);
at::Tensor coor_to_voxelidx =
-at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options());
int voxel_num = 0;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
points.scalar_type(), "hard_voxelize_forward_cpu_kernel", [&] {
hard_voxelize_forward_cpu_kernel<scalar_t, int>(
points.accessor<scalar_t, 2>(), voxels.accessor<scalar_t, 3>(),
coors.accessor<int, 2>(), num_points_per_voxel.accessor<int, 1>(),
coor_to_voxelidx.accessor<int, 3>(), voxel_num, voxel_size,
coors_range, grid_size, max_points, max_voxels, num_points,
num_features, NDim);
});
return voxel_num;
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "voxelization_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int max_points, max_voxels, NDim;
SSAttrs(attr)
.get<int>("max_points", max_points)
.get<int>("max_voxels", max_voxels)
.get<int>("NDim", NDim)
.done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
const auto& coors_range = buildATensor(ctx, ins[2]);
auto voxels = buildATensor(ctx, outs[0]);
auto coors = buildATensor(ctx, outs[1]);
auto num_points_per_voxel = buildATensor(ctx, outs[2]);
auto voxel_num = buildATensor(ctx, outs[3]);
hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
num_points_per_voxel, voxel_num, max_points, max_voxels,
NDim);
}
void dynamic_voxelize_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int NDim;
SSAttrs(attr).get<int>("NDim", NDim).done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
const auto& coors_range = buildATensor(ctx, ins[2]);
auto coors = buildATensor(ctx, outs[0]);
dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
}
#endif
void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int max_points, max_voxels, NDim;
SSAttrs(attr)
.get<int>("max_points", max_points)
.get<int>("max_voxels", max_voxels)
.get<int>("NDim", NDim)
.done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
const auto& coors_range = buildATensor(ctx, ins[2]);
auto voxels = buildATensor(ctx, outs[0]);
auto coors = buildATensor(ctx, outs[1]);
auto num_points_per_voxel = buildATensor(ctx, outs[2]);
auto voxel_num = buildATensor(ctx, outs[3]);
hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
num_points_per_voxel, voxel_num, max_points, max_voxels,
NDim);
}
void dynamic_voxelize_forward_cpu_parrots(HostContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int NDim;
SSAttrs(attr).get<int>("NDim", NDim).done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
const auto& coors_range = buildATensor(ctx, ins[2]);
auto coors = buildATensor(ctx, outs[0]);
dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
}
PARROTS_EXTENSION_REGISTER(hard_voxelize_forward)
.attr("max_points")
.attr("max_voxels")
.attr("NDim")
.input(3)
.output(4)
.apply(hard_voxelize_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(hard_voxelize_forward_cuda_parrots)
#endif
.done();
PARROTS_EXTENSION_REGISTER(dynamic_voxelize_forward)
.attr("NDim")
.input(3)
.output(1)
.apply(dynamic_voxelize_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(dynamic_voxelize_forward_cuda_parrots)
#endif
.done();
// Copyright (c) OpenMMLab. All rights reserved
#ifndef VOXELIZATION_PYTORCH_H
#define VOXELIZATION_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &voxels,
at::Tensor &coors, at::Tensor &num_points_per_voxel,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3);
void dynamic_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &coors,
const int NDim = 3);
#endif // VOXELIZATION_PYTORCH_H
...@@ -27,9 +27,9 @@ void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample, ...@@ -27,9 +27,9 @@ void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample,
}; };
#endif #endif
void group_points_forward(int b, int c, int n, int npoints, int nsample, void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints,
Tensor out_tensor) { int nsample) {
if (points_tensor.device().is_cuda()) { if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
group_points_forward_cuda(b, c, n, npoints, nsample, points_tensor, group_points_forward_cuda(b, c, n, npoints, nsample, points_tensor,
...@@ -42,9 +42,9 @@ void group_points_forward(int b, int c, int n, int npoints, int nsample, ...@@ -42,9 +42,9 @@ void group_points_forward(int b, int c, int n, int npoints, int nsample,
} }
} }
void group_points_backward(int b, int c, int n, int npoints, int nsample, void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n,
Tensor grad_points_tensor) { int npoints, int nsample) {
if (grad_out_tensor.device().is_cuda()) { if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
group_points_backward_cuda(b, c, n, npoints, nsample, grad_out_tensor, group_points_backward_cuda(b, c, n, npoints, nsample, grad_out_tensor,
......
...@@ -120,7 +120,8 @@ void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, ...@@ -120,7 +120,8 @@ void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
} }
} }
int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) { void iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry] // params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N) // params keep: (N)
...@@ -131,6 +132,7 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) { ...@@ -131,6 +132,7 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) {
int boxes_num = boxes.size(0); int boxes_num = boxes.size(0);
int64_t *keep_data = keep.data_ptr<int64_t>(); int64_t *keep_data = keep.data_ptr<int64_t>();
int64_t *keep_num_data = keep_num.data_ptr<int64_t>();
const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
...@@ -163,8 +165,7 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) { ...@@ -163,8 +165,7 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) {
} }
if (cudaSuccess != cudaGetLastError()) printf("Error!\n"); if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
*keep_num_data = num_to_keep;
return num_to_keep;
#else #else
AT_ERROR("iou3d_nms is not compiled with GPU support"); AT_ERROR("iou3d_nms is not compiled with GPU support");
...@@ -174,8 +175,8 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) { ...@@ -174,8 +175,8 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh) {
} }
} }
int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, void iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh) { float nms_overlap_thresh) {
// params boxes: (N, 5) [x1, y1, x2, y2, ry] // params boxes: (N, 5) [x1, y1, x2, y2, ry]
// params keep: (N) // params keep: (N)
...@@ -186,6 +187,7 @@ int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, ...@@ -186,6 +187,7 @@ int iou3d_nms_normal_forward(Tensor boxes, Tensor keep,
int boxes_num = boxes.size(0); int boxes_num = boxes.size(0);
int64_t *keep_data = keep.data_ptr<int64_t>(); int64_t *keep_data = keep.data_ptr<int64_t>();
int64_t *keep_num_data = keep_num.data_ptr<int64_t>();
const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
...@@ -219,7 +221,7 @@ int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, ...@@ -219,7 +221,7 @@ int iou3d_nms_normal_forward(Tensor boxes, Tensor keep,
if (cudaSuccess != cudaGetLastError()) printf("Error!\n"); if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
return num_to_keep; *keep_num_data = num_to_keep;
#else #else
AT_ERROR("iou3d_nms_normal is not compiled with GPU support"); AT_ERROR("iou3d_nms_normal is not compiled with GPU support");
......
...@@ -65,13 +65,13 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, ...@@ -65,13 +65,13 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
int pooled_width, float spatial_scale, int pooled_width, float spatial_scale,
int sampling_ratio, float gamma); int sampling_ratio, float gamma);
void group_points_forward(int b, int c, int n, int npoints, int nsample, void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints,
Tensor out_tensor); int nsample);
void group_points_backward(int b, int c, int n, int npoints, int nsample, void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n,
Tensor grad_points_tensor); int npoints, int nsample);
void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
Tensor pooled_features, Tensor pooled_empty_flag); Tensor pooled_features, Tensor pooled_empty_flag);
...@@ -119,9 +119,10 @@ void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, ...@@ -119,9 +119,10 @@ void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b, void iou3d_boxes_iou_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_iou); Tensor ans_iou);
int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh); int iou3d_nms_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh);
int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
float nms_overlap_thresh); float nms_overlap_thresh);
void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor, void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
...@@ -292,16 +293,16 @@ void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats, ...@@ -292,16 +293,16 @@ void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats,
const torch::Tensor &reduce_count, const torch::Tensor &reduce_count,
const std::string &reduce_type); const std::string &reduce_type);
int hard_voxelize_forward(const at::Tensor &points, at::Tensor &voxels, void hard_voxelize_forward(const at::Tensor &points,
at::Tensor &coors, at::Tensor &num_points_per_voxel, const at::Tensor &voxel_size,
const std::vector<float> voxel_size, const at::Tensor &coors_range, at::Tensor &voxels,
const std::vector<float> coors_range, at::Tensor &coors, at::Tensor &num_points_per_voxel,
const int max_points, const int max_voxels, at::Tensor &voxel_num, const int max_points,
const int NDim); const int max_voxels, const int NDim);
void dynamic_voxelize_forward(const at::Tensor &points, at::Tensor &coors, void dynamic_voxelize_forward(const at::Tensor &points,
const std::vector<float> voxel_size, const at::Tensor &voxel_size,
const std::vector<float> coors_range, const at::Tensor &coors_range, at::Tensor &coors,
const int NDim); const int NDim);
void border_align_forward(const Tensor &input, const Tensor &boxes, void border_align_forward(const Tensor &input, const Tensor &boxes,
...@@ -459,13 +460,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -459,13 +460,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("bboxes2"), py::arg("ious"), py::arg("mode"), py::arg("bboxes2"), py::arg("ious"), py::arg("mode"),
py::arg("aligned"), py::arg("offset")); py::arg("aligned"), py::arg("offset"));
m.def("group_points_forward", &group_points_forward, "group_points_forward", m.def("group_points_forward", &group_points_forward, "group_points_forward",
py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("out_tensor"),
py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints"), py::arg("b"), py::arg("c"), py::arg("n"), py::arg("npoints"),
py::arg("nsample"), py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("nsample"));
py::arg("out_tensor"));
m.def("group_points_backward", &group_points_backward, m.def("group_points_backward", &group_points_backward,
"group_points_backward", py::arg("b"), py::arg("c"), py::arg("n"), "group_points_backward", py::arg("grad_out_tensor"),
py::arg("npoints"), py::arg("nsample"), py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"),
py::arg("idx_tensor"), py::arg("grad_points_tensor")); py::arg("c"), py::arg("n"), py::arg("npoints"), py::arg("nsample"));
m.def("knn_forward", &knn_forward, "knn_forward", py::arg("b"), py::arg("n"), m.def("knn_forward", &knn_forward, "knn_forward", py::arg("b"), py::arg("n"),
py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"), py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"),
py::arg("new_xyz_tensor"), py::arg("idx_tensor"), py::arg("new_xyz_tensor"), py::arg("idx_tensor"),
...@@ -477,10 +478,11 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -477,10 +478,11 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"iou3d_boxes_iou_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"), "iou3d_boxes_iou_bev_forward", py::arg("boxes_a"), py::arg("boxes_b"),
py::arg("ans_iou")); py::arg("ans_iou"));
m.def("iou3d_nms_forward", &iou3d_nms_forward, "iou3d_nms_forward", m.def("iou3d_nms_forward", &iou3d_nms_forward, "iou3d_nms_forward",
py::arg("boxes"), py::arg("keep"), py::arg("nms_overlap_thresh")); py::arg("boxes"), py::arg("keep"), py::arg("num_out"),
py::arg("nms_overlap_thresh"));
m.def("iou3d_nms_normal_forward", &iou3d_nms_normal_forward, m.def("iou3d_nms_normal_forward", &iou3d_nms_normal_forward,
"iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"), "iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"),
py::arg("nms_overlap_thresh")); py::arg("num_out"), py::arg("nms_overlap_thresh"));
m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward, m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward,
"furthest_point_sampling_forward", py::arg("points_tensor"), "furthest_point_sampling_forward", py::arg("points_tensor"),
py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"), py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"),
...@@ -615,8 +617,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -615,8 +617,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("spatial_scale"), py::arg("sample_num"), py::arg("aligned"), py::arg("spatial_scale"), py::arg("sample_num"), py::arg("aligned"),
py::arg("clockwise")); py::arg("clockwise"));
m.def("roi_align_rotated_backward", &roi_align_rotated_backward, m.def("roi_align_rotated_backward", &roi_align_rotated_backward,
"roi_align_rotated backward", py::arg("grad_output"), py::arg("rois"), "roi_align_rotated backward", py::arg("rois"), py::arg("grad_input"),
py::arg("grad_input"), py::arg("pooled_height"), py::arg("grad_output"), py::arg("pooled_height"),
py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("pooled_width"), py::arg("spatial_scale"),
py::arg("sample_num"), py::arg("aligned"), py::arg("clockwise")); py::arg("sample_num"), py::arg("aligned"), py::arg("clockwise"));
m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward, m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward,
...@@ -628,13 +630,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -628,13 +630,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("reduced_feats"), py::arg("coors_idx"), py::arg("reduce_count"), py::arg("reduced_feats"), py::arg("coors_idx"), py::arg("reduce_count"),
py::arg("reduce_type")); py::arg("reduce_type"));
m.def("hard_voxelize_forward", &hard_voxelize_forward, m.def("hard_voxelize_forward", &hard_voxelize_forward,
"hard_voxelize_forward", py::arg("points"), py::arg("voxels"), "hard_voxelize_forward", py::arg("points"), py::arg("voxel_size"),
py::arg("coors"), py::arg("num_points_per_voxel"), py::arg("coors_range"), py::arg("voxels"), py::arg("coors"),
py::arg("voxel_size"), py::arg("coors_range"), py::arg("max_points"), py::arg("num_points_per_voxel"), py::arg("voxel_num"),
py::arg("max_voxels"), py::arg("NDim")); py::arg("max_points"), py::arg("max_voxels"), py::arg("NDim"));
m.def("dynamic_voxelize_forward", &dynamic_voxelize_forward, m.def("dynamic_voxelize_forward", &dynamic_voxelize_forward,
"dynamic_voxelize_forward", py::arg("points"), py::arg("coors"), "dynamic_voxelize_forward", py::arg("points"), py::arg("voxel_size"),
py::arg("voxel_size"), py::arg("coors_range"), py::arg("NDim")); py::arg("coors_range"), py::arg("coors"), py::arg("NDim"));
m.def("ms_deform_attn_forward", &ms_deform_attn_forward, m.def("ms_deform_attn_forward", &ms_deform_attn_forward,
"forward function of multi-scale deformable attention", "forward function of multi-scale deformable attention",
py::arg("value"), py::arg("value_spatial_shapes"), py::arg("value"), py::arg("value_spatial_shapes"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment