Unverified Commit 2d73eafe authored by pc's avatar pc Committed by GitHub
Browse files

add mmdet3d op (#1425)


Co-authored-by: default avatarzhouzaida <zhouzaida@163.com>
parent 75cae78c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef KNN_PYTORCH_H
#define KNN_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
Tensor dist2_tensor, int b, int n, int m, int nsample);
#endif // KNN_PYTORCH_H
/*
Modified from
https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp
Point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2018.
*/
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void RoIPointPool3dForwardCUDAKernelLauncher(
int batch_size, int pts_num, int boxes_num, int feature_in_len,
int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag);
void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num,
int feature_in_len, int sampled_pts_num,
const Tensor xyz, const Tensor boxes3d,
const Tensor pts_feature,
Tensor pooled_features,
Tensor pooled_empty_flag) {
RoIPointPool3dForwardCUDAKernelLauncher(
batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz,
boxes3d, pts_feature, pooled_features, pooled_empty_flag);
};
#endif
void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
Tensor pooled_features, Tensor pooled_empty_flag) {
// params xyz: (B, N, 3)
// params boxes3d: (B, M, 7)
// params pts_feature: (B, N, C)
// params pooled_features: (B, M, 512, 3+C)
// params pooled_empty_flag: (B, M)
if (xyz.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(xyz);
CHECK_CUDA_INPUT(boxes3d);
CHECK_CUDA_INPUT(pts_feature);
CHECK_CUDA_INPUT(pooled_features);
CHECK_CUDA_INPUT(pooled_empty_flag);
int batch_size = xyz.size(0);
int pts_num = xyz.size(1);
int boxes_num = boxes3d.size(1);
int feature_in_len = pts_feature.size(2);
int sampled_pts_num = pooled_features.size(2);
roipoint_pool3d_forward_cuda(batch_size, pts_num, boxes_num, feature_in_len,
sampled_pts_num, xyz, boxes3d, pts_feature,
pooled_features, pooled_empty_flag);
#else
AT_ERROR("roipoint_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roipoint_pool3d is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "roipoint_pool3d_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void roipoint_pool3d_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto xyz = buildATensor(ctx, ins[0]);
auto boxes3d = buildATensor(ctx, ins[1]);
auto pts_feature = buildATensor(ctx, ins[2]);
auto pooled_features = buildATensor(ctx, outs[0]);
auto pooled_empty_flag = buildATensor(ctx, outs[1]);
roipoint_pool3d_forward(xyz, boxes3d, pts_feature, pooled_features,
pooled_empty_flag);
}
PARROTS_EXTENSION_REGISTER(roipoint_pool3d_forward)
.input(3)
.output(2)
.apply(roipoint_pool3d_forward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROIPOINT_POOL3D_PYTORCH_H
#define ROIPOINT_POOL3D_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
Tensor pooled_features, Tensor pooled_empty_flag);
#endif // ROIPOINT_POOL3D_PYTORCH_H
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
const Tensor points,
const Tensor idx,
const Tensor weight, Tensor out);
void three_interpolate_forward_cuda(int b, int c, int m, int n,
const Tensor points, const Tensor idx,
const Tensor weight, Tensor out) {
ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight,
out);
};
void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
const Tensor grad_out,
const Tensor idx,
const Tensor weight,
Tensor grad_points);
void three_interpolate_backward_cuda(int b, int c, int n, int m,
const Tensor grad_out, const Tensor idx,
const Tensor weight, Tensor grad_points) {
ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight,
grad_points);
};
#endif
void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor out_tensor, int b,
int c, int m, int n) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_interpolate_forward_cuda(b, c, m, n, points_tensor, idx_tensor,
weight_tensor, out_tensor);
#else
AT_ERROR("three_interpolate is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_interpolate is not implemented on CPU");
}
}
void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor grad_points_tensor,
int b, int c, int n, int m) {
if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_interpolate_backward_cuda(b, c, n, m, grad_out_tensor, idx_tensor,
weight_tensor, grad_points_tensor);
#else
AT_ERROR("three_interpolate is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_interpolate is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "three_interpolate_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void three_interpolate_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, m, n;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("m", m)
.get<int>("n", n)
.done();
auto points_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto weight_tensor = buildATensor(ctx, ins[2]);
auto out_tensor = buildATensor(ctx, outs[0]);
three_interpolate_forward(points_tensor, idx_tensor, weight_tensor,
out_tensor, b, c, m, n);
}
void three_interpolate_backward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, n, m;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("n", n)
.get<int>("m", m)
.done();
auto grad_out_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto weight_tensor = buildATensor(ctx, ins[2]);
auto grad_points_tensor = buildATensor(ctx, outs[0]);
three_interpolate_backward(grad_out_tensor, idx_tensor, weight_tensor,
grad_points_tensor, b, c, n, m);
}
PARROTS_EXTENSION_REGISTER(three_interpolate_forward)
.attr("b")
.attr("c")
.attr("m")
.attr("n")
.input(3)
.output(1)
.apply(three_interpolate_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(three_interpolate_backward)
.attr("b")
.attr("c")
.attr("n")
.attr("m")
.input(3)
.output(1)
.apply(three_interpolate_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_INTERPOLATE_PYTORCH_H
#define THREE_INTERPOLATE_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor out_tensor, int b,
int c, int m, int n);
void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor grad_points_tensor,
int b, int c, int n, int m);
#endif // THREE_INTERPOLATE_PYTORCH_H
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2,
Tensor idx);
void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2, Tensor idx) {
ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
};
#endif
void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
int m) {
if (unknown_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
#else
AT_ERROR("three_nn is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_nn is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "three_nn_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void three_nn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, n, m;
SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
auto unknown_tensor = buildATensor(ctx, ins[0]);
auto known_tensor = buildATensor(ctx, ins[1]);
auto dist2_tensor = buildATensor(ctx, outs[0]);
auto idx_tensor = buildATensor(ctx, outs[1]);
three_nn_forward(unknown_tensor, known_tensor, dist2_tensor, idx_tensor, b, n,
m);
}
PARROTS_EXTENSION_REGISTER(three_nn_forward)
.attr("b")
.attr("n")
.attr("m")
.input(2)
.output(2)
.apply(three_nn_forward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef THREE_NN_PYTORCH_H
#define THREE_NN_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
int m);
#endif // THREE_NN_PYTORCH_H
...@@ -34,10 +34,10 @@ void assign_score_withk_backward_cuda( ...@@ -34,10 +34,10 @@ void assign_score_withk_backward_cuda(
}; };
#endif #endif
void assign_score_withk_forward(int B, int N0, int N1, int M, int K, int O, void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
int aggregate, const Tensor& points, const Tensor& scores, const Tensor& knn_idx,
const Tensor& centers, const Tensor& scores, Tensor& output, int B, int N0, int N1, int M,
const Tensor& knn_idx, Tensor& output) { int K, int O, int aggregate) {
if (points.device().is_cuda()) { if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS(points); CHECK_CONTIGUOUS(points);
...@@ -56,12 +56,12 @@ void assign_score_withk_forward(int B, int N0, int N1, int M, int K, int O, ...@@ -56,12 +56,12 @@ void assign_score_withk_forward(int B, int N0, int N1, int M, int K, int O,
} }
} }
void assign_score_withk_backward(int B, int N0, int N1, int M, int K, int O, void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
int aggregate, const Tensor& grad_out, const Tensor& centers, const Tensor& scores,
const Tensor& points, const Tensor& centers, const Tensor& knn_idx, Tensor& grad_points,
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_centers, Tensor& grad_scores,
Tensor& grad_points, Tensor& grad_centers, int B, int N0, int N1, int M, int K, int O,
Tensor& grad_scores) { int aggregate) {
if (grad_points.device().is_cuda()) { if (grad_points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS(grad_out); CHECK_CONTIGUOUS(grad_out);
......
...@@ -18,9 +18,9 @@ void ball_query_forward_cuda(int b, int n, int m, float min_radius, ...@@ -18,9 +18,9 @@ void ball_query_forward_cuda(int b, int n, int m, float min_radius,
}; };
#endif #endif
void ball_query_forward(int b, int n, int m, float min_radius, float max_radius, void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
int nsample, Tensor new_xyz_tensor, Tensor xyz_tensor, Tensor idx_tensor, int b, int n, int m,
Tensor idx_tensor) { float min_radius, float max_radius, int nsample) {
if (new_xyz_tensor.device().is_cuda()) { if (new_xyz_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(new_xyz_tensor); CHECK_CUDA_INPUT(new_xyz_tensor);
......
...@@ -25,8 +25,8 @@ void furthest_point_sampling_with_dist_forward_cuda(int b, int n, int m, ...@@ -25,8 +25,8 @@ void furthest_point_sampling_with_dist_forward_cuda(int b, int n, int m,
} }
#endif #endif
void furthest_point_sampling_forward(int b, int n, int m, Tensor points_tensor, void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
Tensor temp_tensor, Tensor idx_tensor) { Tensor idx_tensor, int b, int n, int m) {
if (points_tensor.device().is_cuda()) { if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
const float *points = points_tensor.data_ptr<float>(); const float *points = points_tensor.data_ptr<float>();
...@@ -41,10 +41,10 @@ void furthest_point_sampling_forward(int b, int n, int m, Tensor points_tensor, ...@@ -41,10 +41,10 @@ void furthest_point_sampling_forward(int b, int n, int m, Tensor points_tensor,
} }
} }
void furthest_point_sampling_with_dist_forward(int b, int n, int m, void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
Tensor points_tensor,
Tensor temp_tensor, Tensor temp_tensor,
Tensor idx_tensor) { Tensor idx_tensor, int b, int n,
int m) {
if (points_tensor.device().is_cuda()) { if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
const float *points = points_tensor.data<float>(); const float *points = points_tensor.data<float>();
......
...@@ -24,9 +24,9 @@ void gather_points_backward_cuda(int b, int c, int n, int npoints, ...@@ -24,9 +24,9 @@ void gather_points_backward_cuda(int b, int c, int n, int npoints,
}; };
#endif #endif
void gather_points_forward(int b, int c, int n, int npoints, void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n,
Tensor out_tensor) { int npoints) {
if (points_tensor.device().is_cuda()) { if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
gather_points_forward_cuda(b, c, n, npoints, points_tensor, idx_tensor, gather_points_forward_cuda(b, c, n, npoints, points_tensor, idx_tensor,
...@@ -39,9 +39,9 @@ void gather_points_forward(int b, int c, int n, int npoints, ...@@ -39,9 +39,9 @@ void gather_points_forward(int b, int c, int n, int npoints,
} }
} }
void gather_points_backward(int b, int c, int n, int npoints, void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n,
Tensor grad_points_tensor) { int npoints) {
if (grad_out_tensor.device().is_cuda()) { if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
gather_points_backward_cuda(b, c, n, npoints, grad_out_tensor, idx_tensor, gather_points_backward_cuda(b, c, n, npoints, grad_out_tensor, idx_tensor,
......
...@@ -14,9 +14,8 @@ void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz, ...@@ -14,9 +14,8 @@ void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz,
} }
#endif #endif
void knn_forward(int b, int n, int m, int nsample, Tensor xyz_tensor, void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor, int b, int n, int m, int nsample) {
Tensor dist2_tensor) {
if (new_xyz_tensor.device().is_cuda()) { if (new_xyz_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(new_xyz_tensor); CHECK_CUDA_INPUT(new_xyz_tensor);
......
...@@ -4,17 +4,17 @@ ...@@ -4,17 +4,17 @@
std::string get_compiler_version(); std::string get_compiler_version();
std::string get_compiling_cuda_version(); std::string get_compiling_cuda_version();
void assign_score_withk_forward(int B, int N0, int N1, int M, int K, int O, void assign_score_withk_forward(const Tensor &points, const Tensor &centers,
int aggregate, const Tensor &points, const Tensor &scores, const Tensor &knn_idx,
const Tensor &centers, const Tensor &scores, Tensor &output, int B, int N0, int N1, int M,
const Tensor &knn_idx, Tensor &output); int K, int O, int aggregate);
void assign_score_withk_backward(int B, int N0, int N1, int M, int K, int O, void assign_score_withk_backward(const Tensor &grad_out, const Tensor &points,
int aggregate, const Tensor &grad_out, const Tensor &centers, const Tensor &scores,
const Tensor &points, const Tensor &centers, const Tensor &knn_idx, Tensor &grad_points,
const Tensor &scores, const Tensor &knn_idx, Tensor &grad_centers, Tensor &grad_scores,
Tensor &grad_points, Tensor &grad_centers, int B, int N0, int N1, int M, int K, int O,
Tensor &grad_scores); int aggregate);
void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
int kernel_size, int group_size, int scale_factor); int kernel_size, int group_size, int scale_factor);
...@@ -76,13 +76,12 @@ void group_points_backward(int b, int c, int n, int npoints, int nsample, ...@@ -76,13 +76,12 @@ void group_points_backward(int b, int c, int n, int npoints, int nsample,
void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature, void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
Tensor pooled_features, Tensor pooled_empty_flag); Tensor pooled_features, Tensor pooled_empty_flag);
void gather_points_forward(int b, int c, int n, int npoints, void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor points_tensor, Tensor idx_tensor, Tensor out_tensor, int b, int c, int n, int npoints);
Tensor out_tensor);
void gather_points_backward(int b, int c, int n, int npoints, void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor grad_points_tensor, int b, int c, int n,
Tensor grad_points_tensor); int npoints);
void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
Tensor output, float gamma, float alpha); Tensor output, float gamma, float alpha);
...@@ -97,22 +96,23 @@ void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, ...@@ -97,22 +96,23 @@ void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
Tensor buff, Tensor grad_input, float gamma, Tensor buff, Tensor grad_input, float gamma,
float alpha); float alpha);
void three_interpolate_forward(int b, int c, int m, int n, Tensor points_tensor, void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor idx_tensor, Tensor weight_tensor, Tensor weight_tensor, Tensor out_tensor, int b,
Tensor out_tensor); int c, int m, int n);
void three_interpolate_backward(int b, int c, int n, int m, void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor,
Tensor weight_tensor, int b, int c, int n, int m);
Tensor grad_points_tensor);
void three_nn_forward(int b, int n, int m, Tensor unknown_tensor, void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor known_tensor, Tensor dist2_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
Tensor idx_tensor); int m);
void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
const int mode, const bool aligned, const int offset); const int mode, const bool aligned, const int offset);
void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
Tensor dist2_tensor, int b, int n, int m, int nsample);
void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b, void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
Tensor ans_overlap); Tensor ans_overlap);
...@@ -124,16 +124,13 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh); ...@@ -124,16 +124,13 @@ int iou3d_nms_forward(Tensor boxes, Tensor keep, float nms_overlap_thresh);
int iou3d_nms_normal_forward(Tensor boxes, Tensor keep, int iou3d_nms_normal_forward(Tensor boxes, Tensor keep,
float nms_overlap_thresh); float nms_overlap_thresh);
void knn_forward(int b, int n, int m, int nsample, Tensor xyz_tensor, void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
Tensor new_xyz_tensor, Tensor idx_tensor, Tensor dist2_tensor); Tensor idx_tensor, int b, int n, int m);
void furthest_point_sampling_forward(int b, int n, int m, Tensor points_tensor, void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
Tensor temp_tensor, Tensor idx_tensor);
void furthest_point_sampling_with_dist_forward(int b, int n, int m,
Tensor points_tensor,
Tensor temp_tensor, Tensor temp_tensor,
Tensor idx_tensor); Tensor idx_tensor, int b, int n,
int m);
void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
const Tensor mask_w_idx, Tensor col, const Tensor mask_w_idx, Tensor col,
...@@ -238,9 +235,9 @@ void tin_shift_forward(Tensor input, Tensor shift, Tensor output); ...@@ -238,9 +235,9 @@ void tin_shift_forward(Tensor input, Tensor shift, Tensor output);
void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input); void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input);
void ball_query_forward(int b, int n, int m, float min_radius, float max_radius, void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
int nsample, Tensor new_xyz_tensor, Tensor xyz_tensor, Tensor idx_tensor, int b, int n, int m,
Tensor idx_tensor); float min_radius, float max_radius, int nsample);
Tensor bottom_pool_forward(Tensor input); Tensor bottom_pool_forward(Tensor input);
...@@ -352,32 +349,31 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -352,32 +349,31 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("empty"), py::arg("act"), py::arg("grad"), py::arg("alpha"), py::arg("empty"), py::arg("act"), py::arg("grad"), py::arg("alpha"),
py::arg("scale")); py::arg("scale"));
m.def("gather_points_forward", &gather_points_forward, m.def("gather_points_forward", &gather_points_forward,
"gather_points_forward", py::arg("b"), py::arg("c"), py::arg("n"), "gather_points_forward", py::arg("points_tensor"),
py::arg("npoints"), py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("idx_tensor"), py::arg("out_tensor"), py::arg("b"),
py::arg("out_tensor")); py::arg("c"), py::arg("n"), py::arg("npoints"));
m.def("gather_points_backward", &gather_points_backward, m.def("gather_points_backward", &gather_points_backward,
"gather_points_backward", py::arg("b"), py::arg("c"), py::arg("n"), "gather_points_backward", py::arg("grad_out_tensor"),
py::arg("npoints"), py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("idx_tensor"), py::arg("grad_points_tensor"), py::arg("b"),
py::arg("grad_points_tensor")); py::arg("c"), py::arg("n"), py::arg("npoints"));
m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
m.def("get_compiling_cuda_version", &get_compiling_cuda_version, m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
"get_compiling_cuda_version"); "get_compiling_cuda_version");
m.def("assign_score_withk_forward", &assign_score_withk_forward, m.def("assign_score_withk_forward", &assign_score_withk_forward,
"assign_score_withk_forward", py::arg("B"), py::arg("N0"), "assign_score_withk_forward", py::arg("points"), py::arg("centers"),
py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"), py::arg("scores"), py::arg("knn_idx"), py::arg("output"), py::arg("B"),
py::arg("aggregate"), py::arg("points"), py::arg("centers"), py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"),
py::arg("scores"), py::arg("knn_idx"), py::arg("output")); py::arg("aggregate"));
m.def("assign_score_withk_backward", &assign_score_withk_backward, m.def("assign_score_withk_backward", &assign_score_withk_backward,
"assign_score_withk_backward", py::arg("B"), py::arg("N0"), "assign_score_withk_backward", py::arg("grad_out"), py::arg("points"),
py::arg("N1"), py::arg("M"), py::arg("K"), py::arg("O"),
py::arg("aggregate"), py::arg("grad_out"), py::arg("points"),
py::arg("centers"), py::arg("scores"), py::arg("knn_idx"), py::arg("centers"), py::arg("scores"), py::arg("knn_idx"),
py::arg("grad_points"), py::arg("grad_centers"), py::arg("grad_points"), py::arg("grad_centers"), py::arg("grad_scores"),
py::arg("grad_scores")); py::arg("B"), py::arg("N0"), py::arg("N1"), py::arg("M"), py::arg("K"),
m.def("knn_forward", &knn_forward, "knn_forward", py::arg("b"), py::arg("n"), py::arg("O"), py::arg("aggregate"));
py::arg("m"), py::arg("nsample"), py::arg("xyz_tensor"), m.def("knn_forward", &knn_forward, "knn_forward", py::arg("xyz_tensor"),
py::arg("new_xyz_tensor"), py::arg("idx_tensor"), py::arg("new_xyz_tensor"), py::arg("idx_tensor"),
py::arg("dist2_tensor")); py::arg("dist2_tensor"), py::arg("b"), py::arg("n"), py::arg("m"),
py::arg("nsample"));
m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward", m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward",
py::arg("features"), py::arg("masks"), py::arg("output"), py::arg("features"), py::arg("masks"), py::arg("output"),
py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor"));
...@@ -447,17 +443,18 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -447,17 +443,18 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("weight"), py::arg("buff"), py::arg("grad_input"), py::arg("weight"), py::arg("buff"), py::arg("grad_input"),
py::arg("gamma"), py::arg("alpha")); py::arg("gamma"), py::arg("alpha"));
m.def("three_interpolate_forward", &three_interpolate_forward, m.def("three_interpolate_forward", &three_interpolate_forward,
"three_interpolate_forward", py::arg("b"), py::arg("c"), py::arg("m"), "three_interpolate_forward", py::arg("points_tensor"),
py::arg("n"), py::arg("points_tensor"), py::arg("idx_tensor"), py::arg("idx_tensor"), py::arg("weight_tensor"), py::arg("out_tensor"),
py::arg("weight_tensor"), py::arg("out_tensor")); py::arg("b"), py::arg("c"), py::arg("m"), py::arg("n"));
m.def("three_interpolate_backward", &three_interpolate_backward, m.def("three_interpolate_backward", &three_interpolate_backward,
"three_interpolate_backward", py::arg("b"), py::arg("c"), py::arg("n"), "three_interpolate_backward", py::arg("grad_out_tensor"),
py::arg("m"), py::arg("grad_out_tensor"), py::arg("idx_tensor"), py::arg("idx_tensor"), py::arg("weight_tensor"),
py::arg("weight_tensor"), py::arg("grad_points_tensor")); py::arg("grad_points_tensor"), py::arg("b"), py::arg("c"), py::arg("n"),
m.def("three_nn_forward", &three_nn_forward, "three_nn_forward", py::arg("b"), py::arg("m"));
py::arg("n"), py::arg("m"), py::arg("unknown_tensor"), m.def("three_nn_forward", &three_nn_forward, "three_nn_forward",
py::arg("known_tensor"), py::arg("dist2_tensor"), py::arg("unknown_tensor"), py::arg("known_tensor"),
py::arg("idx_tensor")); py::arg("dist2_tensor"), py::arg("idx_tensor"), py::arg("b"),
py::arg("n"), py::arg("m"));
m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"), m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"),
py::arg("bboxes2"), py::arg("ious"), py::arg("mode"), py::arg("bboxes2"), py::arg("ious"), py::arg("mode"),
py::arg("aligned"), py::arg("offset")); py::arg("aligned"), py::arg("offset"));
...@@ -485,14 +482,14 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -485,14 +482,14 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"), "iou3d_nms_normal_forward", py::arg("boxes"), py::arg("keep"),
py::arg("nms_overlap_thresh")); py::arg("nms_overlap_thresh"));
m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward, m.def("furthest_point_sampling_forward", &furthest_point_sampling_forward,
"furthest_point_sampling_forward", py::arg("b"), py::arg("n"), "furthest_point_sampling_forward", py::arg("points_tensor"),
py::arg("m"), py::arg("points_tensor"), py::arg("temp_tensor"), py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"),
py::arg("idx_tensor")); py::arg("n"), py::arg("m"));
m.def("furthest_point_sampling_with_dist_forward", m.def("furthest_point_sampling_with_dist_forward",
&furthest_point_sampling_with_dist_forward, &furthest_point_sampling_with_dist_forward,
"furthest_point_sampling_with_dist_forward", py::arg("b"), py::arg("n"), "furthest_point_sampling_with_dist_forward", py::arg("points_tensor"),
py::arg("m"), py::arg("points_tensor"), py::arg("temp_tensor"), py::arg("temp_tensor"), py::arg("idx_tensor"), py::arg("b"),
py::arg("idx_tensor")); py::arg("n"), py::arg("m"));
m.def("masked_im2col_forward", &masked_im2col_forward, m.def("masked_im2col_forward", &masked_im2col_forward,
"masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"), "masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"),
py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"), py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"),
...@@ -609,9 +606,9 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -609,9 +606,9 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("scores"), py::arg("order"), py::arg("dets_sorted"), py::arg("scores"), py::arg("order"), py::arg("dets_sorted"),
py::arg("iou_threshold"), py::arg("multi_label")); py::arg("iou_threshold"), py::arg("multi_label"));
m.def("ball_query_forward", &ball_query_forward, "ball_query_forward", m.def("ball_query_forward", &ball_query_forward, "ball_query_forward",
py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"),
py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"), py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"),
py::arg("max_radius"), py::arg("nsample"), py::arg("new_xyz_tensor"), py::arg("max_radius"), py::arg("nsample"));
py::arg("xyz_tensor"), py::arg("idx_tensor"));
m.def("roi_align_rotated_forward", &roi_align_rotated_forward, m.def("roi_align_rotated_forward", &roi_align_rotated_forward,
"roi_align_rotated forward", py::arg("input"), py::arg("rois"), "roi_align_rotated forward", py::arg("input"), py::arg("rois"),
py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"), py::arg("output"), py::arg("pooled_height"), py::arg("pooled_width"),
...@@ -657,6 +654,19 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -657,6 +654,19 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"backward function of border_align", py::arg("grad_output"), "backward function of border_align", py::arg("grad_output"),
py::arg("boxes"), py::arg("argmax_idx"), py::arg("grad_input"), py::arg("boxes"), py::arg("argmax_idx"), py::arg("grad_input"),
py::arg("pool_size")); py::arg("pool_size"));
m.def("correlation_forward", &correlation_forward, "Correlation forward",
py::arg("input1"), py::arg("input2"), py::arg("output"), py::arg("kH"),
py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"),
py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"),
py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"),
py::arg("dW"));
m.def("correlation_backward", &correlation_backward, "Correlation backward",
py::arg("grad_output"), py::arg("input1"), py::arg("input2"),
py::arg("grad_input1"), py::arg("grad_input2"), py::arg("kH"),
py::arg("kW"), py::arg("patchH"), py::arg("patchW"), py::arg("padH"),
py::arg("padW"), py::arg("dilationH"), py::arg("dilationW"),
py::arg("dilation_patchH"), py::arg("dilation_patchW"), py::arg("dH"),
py::arg("dW"));
m.def("points_in_boxes_cpu_forward", &points_in_boxes_cpu_forward, m.def("points_in_boxes_cpu_forward", &points_in_boxes_cpu_forward,
"points_in_boxes_cpu_forward", py::arg("boxes_tensor"), "points_in_boxes_cpu_forward", py::arg("boxes_tensor"),
py::arg("pts_tensor"), py::arg("pts_indices_tensor")); py::arg("pts_tensor"), py::arg("pts_indices_tensor"));
...@@ -674,6 +684,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -674,6 +684,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"roiaware_pool3d_backward", py::arg("pts_idx_of_voxels"), "roiaware_pool3d_backward", py::arg("pts_idx_of_voxels"),
py::arg("argmax"), py::arg("grad_out"), py::arg("grad_in"), py::arg("argmax"), py::arg("grad_out"), py::arg("grad_in"),
py::arg("pool_method")); py::arg("pool_method"));
m.def("correlation_forward", &correlation_forward, "Correlation forward");
m.def("correlation_backward", &correlation_backward, "Correlation backward");
} }
...@@ -30,9 +30,9 @@ void three_interpolate_backward_cuda(int b, int c, int n, int m, ...@@ -30,9 +30,9 @@ void three_interpolate_backward_cuda(int b, int c, int n, int m,
}; };
#endif #endif
void three_interpolate_forward(int b, int c, int m, int n, Tensor points_tensor, void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor idx_tensor, Tensor weight_tensor, Tensor weight_tensor, Tensor out_tensor, int b,
Tensor out_tensor) { int c, int m, int n) {
if (points_tensor.device().is_cuda()) { if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
three_interpolate_forward_cuda(b, c, m, n, points_tensor, idx_tensor, three_interpolate_forward_cuda(b, c, m, n, points_tensor, idx_tensor,
...@@ -45,10 +45,9 @@ void three_interpolate_forward(int b, int c, int m, int n, Tensor points_tensor, ...@@ -45,10 +45,9 @@ void three_interpolate_forward(int b, int c, int m, int n, Tensor points_tensor,
} }
} }
void three_interpolate_backward(int b, int c, int n, int m, void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_out_tensor, Tensor idx_tensor, Tensor weight_tensor, Tensor grad_points_tensor,
Tensor weight_tensor, int b, int c, int n, int m) {
Tensor grad_points_tensor) {
if (grad_out_tensor.device().is_cuda()) { if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
three_interpolate_backward_cuda(b, c, n, m, grad_out_tensor, idx_tensor, three_interpolate_backward_cuda(b, c, n, m, grad_out_tensor, idx_tensor,
......
...@@ -14,9 +14,9 @@ void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown, ...@@ -14,9 +14,9 @@ void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
}; };
#endif #endif
void three_nn_forward(int b, int n, int m, Tensor unknown_tensor, void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor known_tensor, Tensor dist2_tensor, Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
Tensor idx_tensor) { int m) {
if (unknown_tensor.device().is_cuda()) { if (unknown_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor, three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
......
...@@ -30,9 +30,16 @@ class FurthestPointSampling(Function): ...@@ -30,9 +30,16 @@ class FurthestPointSampling(Function):
output = torch.cuda.IntTensor(B, num_points) output = torch.cuda.IntTensor(B, num_points)
temp = torch.cuda.FloatTensor(B, N).fill_(1e10) temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
ext_module.furthest_point_sampling_forward(B, N, num_points, ext_module.furthest_point_sampling_forward(
points_xyz, temp, output) points_xyz,
ctx.mark_non_differentiable(output) temp,
output,
b=B,
n=N,
m=num_points,
)
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(output)
return output return output
@staticmethod @staticmethod
...@@ -62,8 +69,9 @@ class FurthestPointSamplingWithDist(Function): ...@@ -62,8 +69,9 @@ class FurthestPointSamplingWithDist(Function):
temp = points_dist.new_zeros([B, N]).fill_(1e10) temp = points_dist.new_zeros([B, N]).fill_(1e10)
ext_module.furthest_point_sampling_with_dist_forward( ext_module.furthest_point_sampling_with_dist_forward(
B, N, num_points, points_dist, temp, output) points_dist, temp, output, b=B, n=N, m=num_points)
ctx.mark_non_differentiable(output) if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(output)
return output return output
@staticmethod @staticmethod
......
...@@ -28,11 +28,12 @@ class GatherPoints(Function): ...@@ -28,11 +28,12 @@ class GatherPoints(Function):
_, C, N = features.size() _, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint) output = torch.cuda.FloatTensor(B, C, npoint)
ext_module.gather_points_forward(B, C, N, npoint, features, indices, ext_module.gather_points_forward(
output) features, indices, output, b=B, c=C, n=N, npoints=npoint)
ctx.for_backwards = (indices, C, N) ctx.for_backwards = (indices, C, N)
ctx.mark_non_differentiable(indices) if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(indices)
return output return output
@staticmethod @staticmethod
...@@ -42,8 +43,14 @@ class GatherPoints(Function): ...@@ -42,8 +43,14 @@ class GatherPoints(Function):
grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
grad_out_data = grad_out.data.contiguous() grad_out_data = grad_out.data.contiguous()
ext_module.gather_points_backward(B, C, N, npoint, grad_out_data, idx, ext_module.gather_points_backward(
grad_features.data) grad_out_data,
idx,
grad_features.data,
b=B,
c=C,
n=N,
npoints=npoint)
return grad_features, None return grad_features, None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment