Unverified Commit 2d73eafe authored by pc's avatar pc Committed by GitHub
Browse files

add mmdet3d op (#1425)


Co-authored-by: default avatarzhouzaida <zhouzaida@163.com>
parent 75cae78c
......@@ -57,12 +57,19 @@ class AssignScoreWithK(Function):
_, npoint, K, _ = scores.size()
output = point_features.new_zeros((B, out_dim, npoint, K))
ext_module.assign_score_withk_forward(B, N, npoint, M, K, out_dim,
agg[aggregate],
ext_module.assign_score_withk_forward(
point_features.contiguous(),
center_features.contiguous(),
scores.contiguous(),
knn_idx.contiguous(), output)
knn_idx.contiguous(),
output,
B=B,
N0=N,
N1=npoint,
M=M,
K=K,
O=out_dim,
aggregate=agg[aggregate])
ctx.save_for_backward(output, point_features, center_features, scores,
knn_idx)
......@@ -92,15 +99,22 @@ class AssignScoreWithK(Function):
grad_center_features = center_features.new_zeros(center_features.shape)
grad_scores = scores.new_zeros(scores.shape)
ext_module.assign_score_withk_backward(B, N, npoint, M, K, out_dim,
agg, grad_out.contiguous(),
ext_module.assign_score_withk_backward(
grad_out.contiguous(),
point_features.contiguous(),
center_features.contiguous(),
scores.contiguous(),
knn_idx.contiguous(),
grad_point_features,
grad_center_features,
grad_scores)
grad_scores,
B=B,
N0=N,
N1=npoint,
M=M,
K=K,
O=out_dim,
aggregate=agg)
return grad_scores, grad_point_features, \
grad_center_features, None, None
......
......@@ -33,8 +33,17 @@ class BallQuery(Function):
npoint = center_xyz.size(1)
idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int)
ext_module.ball_query_forward(B, N, npoint, min_radius, max_radius,
sample_num, center_xyz, xyz, idx)
ext_module.ball_query_forward(
center_xyz,
xyz,
idx,
b=B,
n=N,
m=npoint,
min_radius=min_radius,
max_radius=max_radius,
nsample=sample_num)
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(idx)
return idx
......
......@@ -39,10 +39,22 @@ class CorrelationFunction(Function):
output = input1.new_zeros(output_size)
ext_module.correlation_forward(input1, input2, output, kH, kW,
patch_size, patch_size, padH, padW,
dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW)
ext_module.correlation_forward(
input1,
input2,
output,
kH=kH,
kW=kW,
patchH=patch_size,
patchW=patch_size,
padH=padH,
padW=padW,
dilationH=dilationH,
dilationW=dilationW,
dilation_patchH=dilation_patchH,
dilation_patchW=dilation_patchW,
dH=dH,
dW=dW)
return output
......@@ -60,11 +72,24 @@ class CorrelationFunction(Function):
grad_input1 = torch.zeros_like(input1)
grad_input2 = torch.zeros_like(input2)
ext_module.correlation_backward(grad_output, input1, input2,
grad_input1, grad_input2, kH, kW,
patch_size, patch_size, padH, padW,
dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW)
ext_module.correlation_backward(
grad_output,
input1,
input2,
grad_input1,
grad_input2,
kH=kH,
kW=kW,
patchH=patch_size,
patchW=patch_size,
padH=padH,
padW=padW,
dilationH=dilationH,
dilationW=dilationW,
dilation_patchH=dilation_patchH,
dilation_patchW=dilation_patchW,
dH=dH,
dW=dW)
return grad_input1, grad_input2, None, None, None, None, None, None
@staticmethod
......
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void AssignScoreWithKForwardCUDAKernelLauncher(
int B, int N0, int N1, int M, int K, int O, int aggregate,
const Tensor& points, const Tensor& centers, const Tensor& scores,
const Tensor& knn_idx, Tensor& output);
void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O,
int aggregate, const Tensor& points,
const Tensor& centers,
const Tensor& scores,
const Tensor& knn_idx, Tensor& output) {
AssignScoreWithKForwardCUDAKernelLauncher(
B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output);
};
void AssignScoreWithKBackwardCUDAKernelLauncher(
int B, int N0, int N1, int M, int K, int O, int aggregate,
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores);
void assign_score_withk_backward_cuda(
int B, int N0, int N1, int M, int K, int O, int aggregate,
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores) {
AssignScoreWithKBackwardCUDAKernelLauncher(
B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx,
grad_points, grad_centers, grad_scores);
};
#endif
void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx,
Tensor& output, int B, int N0, int N1, int M,
int K, int O, int aggregate) {
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(centers);
CHECK_CONTIGUOUS(scores);
CHECK_CONTIGUOUS(knn_idx);
CHECK_CONTIGUOUS(output);
assign_score_withk_forward_cuda(B, N0, N1, M, K, O, aggregate, points,
centers, scores, knn_idx, output);
#else
AT_ERROR("assign_score_withk is not compiled with GPU support");
#endif
} else {
AT_ERROR("assign_score_withk is not implemented on CPU");
}
}
void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
const Tensor& centers, const Tensor& scores,
const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores,
int B, int N0, int N1, int M, int K, int O,
int aggregate) {
if (grad_points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS(grad_out);
CHECK_CONTIGUOUS(scores);
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(centers);
CHECK_CONTIGUOUS(knn_idx);
CHECK_CONTIGUOUS(grad_scores);
CHECK_CONTIGUOUS(grad_points);
CHECK_CONTIGUOUS(grad_centers);
assign_score_withk_backward_cuda(B, N0, N1, M, K, O, aggregate, grad_out,
points, centers, scores, knn_idx,
grad_points, grad_centers, grad_scores);
#else
AT_ERROR("assign_score_withk is not compiled with GPU support");
#endif
} else {
AT_ERROR("assign_score_withk is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "assign_score_withk_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void assign_score_withk_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int B, N0, N1, M, K, O, aggregate;
SSAttrs(attr)
.get<int>("B", B)
.get<int>("N0", N0)
.get<int>("N1", N1)
.get<int>("M", M)
.get<int>("K", K)
.get<int>("O", O)
.get<int>("aggregate", aggregate)
.done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& centers = buildATensor(ctx, ins[1]);
const auto& scores = buildATensor(ctx, ins[2]);
const auto& knn_idx = buildATensor(ctx, ins[3]);
auto output = buildATensor(ctx, outs[0]);
assign_score_withk_forward(points, centers, scores, knn_idx, output, B, N0,
N1, M, K, O, aggregate);
}
void assign_score_withk_backward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int B, N0, N1, M, K, O, aggregate;
SSAttrs(attr)
.get<int>("B", B)
.get<int>("N0", N0)
.get<int>("N1", N1)
.get<int>("M", M)
.get<int>("K", K)
.get<int>("O", O)
.get<int>("aggregate", aggregate)
.done();
const auto& grad_out = buildATensor(ctx, ins[0]);
const auto& points = buildATensor(ctx, ins[1]);
const auto& centers = buildATensor(ctx, ins[2]);
const auto& scores = buildATensor(ctx, ins[3]);
const auto& knn_idx = buildATensor(ctx, ins[4]);
auto grad_points = buildATensor(ctx, outs[0]);
auto grad_centers = buildATensor(ctx, outs[1]);
auto grad_scores = buildATensor(ctx, outs[2]);
assign_score_withk_backward(grad_out, points, centers, scores, knn_idx,
grad_points, grad_centers, grad_scores, B, N0, N1,
M, K, O, aggregate);
}
PARROTS_EXTENSION_REGISTER(assign_score_withk_forward)
.attr("B")
.attr("N0")
.attr("N1")
.attr("M")
.attr("K")
.attr("O")
.attr("aggregate")
.input(4)
.output(1)
.apply(assign_score_withk_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(assign_score_withk_backward)
.attr("B")
.attr("N0")
.attr("N1")
.attr("M")
.attr("K")
.attr("O")
.attr("aggregate")
.input(5)
.output(3)
.apply(assign_score_withk_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ASSIGN_SCORE_WITHK_PYTORCH_H
#define ASSIGN_SCORE_WITHK_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx,
Tensor& output, int B, int N0, int N1, int M,
int K, int O, int aggregate);
void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
const Tensor& centers, const Tensor& scores,
const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores,
int B, int N0, int N1, int M, int K, int O,
int aggregate);
#endif // ASSIGN_SCORE_WITHK_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "ball_query_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void ball_query_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, n, m, nsample;
float min_radius, max_radius;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("n", n)
.get<int>("m", m)
.get<int>("nsample", nsample)
.get<float>("min_radius", min_radius)
.get<float>("max_radius", max_radius)
.done();
const auto& center_xyz = buildATensor(ctx, ins[0]);
const auto& xyz = buildATensor(ctx, ins[1]);
auto idx = buildATensor(ctx, outs[0]);
ball_query_forward(center_xyz, xyz, idx, b, n, m, min_radius, max_radius,
nsample);
}
PARROTS_EXTENSION_REGISTER(ball_query_forward)
.attr("b")
.attr("n")
.attr("m")
.attr("nsample")
.attr("min_radius")
.attr("max_radius")
.input(2)
.output(1)
.apply(ball_query_parrots)
.done();
#endif
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
float max_radius, int nsample,
const Tensor new_xyz, const Tensor xyz,
Tensor idx);
void ball_query_forward_cuda(int b, int n, int m, float min_radius,
float max_radius, int nsample,
const Tensor new_xyz, const Tensor xyz,
Tensor idx) {
BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample,
new_xyz, xyz, idx);
};
#endif
void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
Tensor idx_tensor, int b, int n, int m,
float min_radius, float max_radius, int nsample) {
if (new_xyz_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(new_xyz_tensor);
CHECK_CUDA_INPUT(xyz_tensor);
ball_query_forward_cuda(b, n, m, min_radius, max_radius, nsample,
new_xyz_tensor, xyz_tensor, idx_tensor);
#else
AT_ERROR("ball_query is not compiled with GPU support");
#endif
} else {
AT_ERROR("ball_query is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#ifndef BALL_QUERY_PYTORCH_H
#define BALL_QUERY_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void ball_query_forward(const Tensor new_xyz, const Tensor xyz, Tensor idx,
int b, int n, int m, float min_radius, float max_radius,
int nsample);
#endif // BALL_QUERY_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved.
#include <iostream>
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
Tensor output, int kH, int kW,
int patchH, int patchW, int padH,
int padW, int dilationH,
int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1,
Tensor input2, Tensor grad_input1,
Tensor grad_input2, int kH, int kW,
int patchH, int patchW, int padH,
int padW, int dilationH,
int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
void correlation_cuda_forward(Tensor input1, Tensor input2, Tensor output,
int kH, int kW, int patchH, int patchW, int padH,
int padW, int dilationH, int dilationW,
int dilation_patchH, int dilation_patchW, int dH,
int dW) {
CorrelationForwardCUDAKernelLauncher(
input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH,
dilationW, dilation_patchH, dilation_patchW, dH, dW);
}
void correlation_cuda_backward(Tensor grad_output, Tensor input1, Tensor input2,
Tensor grad_input1, Tensor grad_input2, int kH,
int kW, int patchH, int patchW, int padH,
int padW, int dilationH, int dilationW,
int dilation_patchH, int dilation_patchW, int dH,
int dW) {
CorrelationBackwardCUDAKernelLauncher(
grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH,
patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
}
#endif
void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW) {
if (input1.device().is_cuda() && input2.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input1);
CHECK_CUDA_INPUT(input2);
correlation_cuda_forward(input1, input2, output, kH, kW, patchH, patchW,
padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
#else
AT_ERROR("Correlation is not compiled with GPU support");
#endif
} else {
AT_ERROR("Correlation is not implemented on CPU");
}
}
void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
Tensor grad_input1, Tensor grad_input2, int kH,
int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW) {
if (input1.device().is_cuda() && input2.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(input1);
CHECK_CUDA_INPUT(input2);
correlation_cuda_backward(grad_output, input1, input2, grad_input1,
grad_input2, kH, kW, patchH, patchW, padH, padW,
dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
#else
AT_ERROR("Correlation is not compiled with GPU support");
#endif
} else {
AT_ERROR("Correlation is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "correlation_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void correlation_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW;
SSAttrs(attr)
.get<int>("kH", kH)
.get<int>("kW", kW)
.get<int>("patchH", patchH)
.get<int>("patchW", patchW)
.get<int>("padH", padH)
.get<int>("padW", padW)
.get<int>("dilationH", dilationH)
.get<int>("dilationW", dilationW)
.get<int>("dilation_patchH", dilation_patchH)
.get<int>("dilation_patchW", dilation_patchW)
.get<int>("dH", dH)
.get<int>("dW", dW)
.done();
auto input1 = buildATensor(ctx, ins[0]);
auto input2 = buildATensor(ctx, ins[1]);
auto output = buildATensor(ctx, outs[0]);
correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
}
void correlation_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW;
SSAttrs(attr)
.get<int>("kH", kH)
.get<int>("kW", kW)
.get<int>("patchH", patchH)
.get<int>("patchW", patchW)
.get<int>("padH", padH)
.get<int>("padW", padW)
.get<int>("dilationH", dilationH)
.get<int>("dilationW", dilationW)
.get<int>("dilation_patchH", dilation_patchH)
.get<int>("dilation_patchW", dilation_patchW)
.get<int>("dH", dH)
.get<int>("dW", dW)
.done();
auto grad_output = buildATensor(ctx, ins[0]);
auto input1 = buildATensor(ctx, ins[1]);
auto input2 = buildATensor(ctx, ins[2]);
auto grad_input1 = buildATensor(ctx, outs[0]);
auto grad_input2 = buildATensor(ctx, outs[1]);
correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
dilation_patchH, dilation_patchW, dH, dW);
}
#endif
void correlation_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW;
SSAttrs(attr)
.get<int>("kH", kH)
.get<int>("kW", kW)
.get<int>("patchH", patchH)
.get<int>("patchW", patchW)
.get<int>("padH", padH)
.get<int>("padW", padW)
.get<int>("dilationH", dilationH)
.get<int>("dilationW", dilationW)
.get<int>("dilation_patchH", dilation_patchH)
.get<int>("dilation_patchW", dilation_patchW)
.get<int>("dH", dH)
.get<int>("dW", dW)
.done();
auto input1 = buildATensor(ctx, ins[0]);
auto input2 = buildATensor(ctx, ins[1]);
auto output = buildATensor(ctx, outs[0]);
correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
}
void correlation_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW;
SSAttrs(attr)
.get<int>("kH", kH)
.get<int>("kW", kW)
.get<int>("patchH", patchH)
.get<int>("patchW", patchW)
.get<int>("padH", padH)
.get<int>("padW", padW)
.get<int>("dilationH", dilationH)
.get<int>("dilationW", dilationW)
.get<int>("dilation_patchH", dilation_patchH)
.get<int>("dilation_patchW", dilation_patchW)
.get<int>("dH", dH)
.get<int>("dW", dW)
.done();
auto grad_output = buildATensor(ctx, ins[0]);
auto input1 = buildATensor(ctx, ins[1]);
auto input2 = buildATensor(ctx, ins[2]);
auto grad_input1 = buildATensor(ctx, outs[0]);
auto grad_input2 = buildATensor(ctx, outs[1]);
correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
dilation_patchH, dilation_patchW, dH, dW);
}
PARROTS_EXTENSION_REGISTER(correlation_forward)
.attr("kH")
.attr("kW")
.attr("patchH")
.attr("patchW")
.attr("padH")
.attr("padW")
.attr("dilationH")
.attr("dilationW")
.attr("dilation_patchH")
.attr("dilation_patchW")
.attr("dH")
.attr("dW")
.input(2)
.output(1)
.apply(correlation_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(correlation_forward_cuda_parrots)
#endif
.done();
PARROTS_EXTENSION_REGISTER(correlation_backward)
.attr("kH")
.attr("kW")
.attr("patchH")
.attr("patchW")
.attr("padH")
.attr("padW")
.attr("dilationH")
.attr("dilationW")
.attr("dilation_patchH")
.attr("dilation_patchW")
.attr("dH")
.attr("dW")
.input(3)
.output(2)
.apply(correlation_backward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(correlation_backward_cuda_parrots)
#endif
.done();
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CORRELATION_PYTORCH_H
#define CORRELATION_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
Tensor grad_input1, Tensor grad_input2, int kH,
int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
#endif // CORRELATION_PYTORCH_H
// Modified from
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m,
const float *dataset,
float *temp, int *idxs);
void furthest_point_sampling_forward_cuda(int b, int n, int m,
const float *dataset, float *temp,
int *idxs) {
FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs);
}
void FurthestPointSamplingWithDistForwardCUDAKernelLauncher(
int b, int n, int m, const float *dataset, float *temp, int *idxs);
void furthest_point_sampling_with_dist_forward_cuda(int b, int n, int m,
const float *dataset,
float *temp, int *idxs) {
FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp,
idxs);
}
#endif
void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
Tensor idx_tensor, int b, int n, int m) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
const float *points = points_tensor.data_ptr<float>();
float *temp = temp_tensor.data_ptr<float>();
int *idx = idx_tensor.data_ptr<int>();
furthest_point_sampling_forward_cuda(b, n, m, points, temp, idx);
#else
AT_ERROR("furthest_point_sampling is not compiled with GPU support");
#endif
} else {
AT_ERROR("furthest_point_sampling is not implemented on CPU");
}
}
void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
Tensor temp_tensor,
Tensor idx_tensor, int b, int n,
int m) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
const float *points = points_tensor.data<float>();
float *temp = temp_tensor.data<float>();
int *idx = idx_tensor.data<int>();
furthest_point_sampling_with_dist_forward_cuda(b, n, m, points, temp, idx);
#else
AT_ERROR(
"furthest_point_sampling_with_dist is not compiled with GPU support");
#endif
} else {
AT_ERROR("furthest_point_sampling_with_dist is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "furthest_point_sample_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void furthest_point_sample_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, n, m;
SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
auto points_tensor = buildATensor(ctx, ins[0]);
auto temp_tensor = buildATensor(ctx, ins[1]);
auto idx_tensor = buildATensor(ctx, outs[0]);
furthest_point_sampling_forward(points_tensor, temp_tensor, idx_tensor, b, n,
m);
}
void furthest_point_sampling_with_dist_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, n, m;
SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
auto points_tensor = buildATensor(ctx, ins[0]);
auto temp_tensor = buildATensor(ctx, ins[1]);
auto idx_tensor = buildATensor(ctx, outs[0]);
furthest_point_sampling_with_dist_forward(points_tensor, temp_tensor,
idx_tensor, b, n, m);
}
PARROTS_EXTENSION_REGISTER(furthest_point_sampling_forward)
.attr("b")
.attr("n")
.attr("m")
.input(2)
.output(1)
.apply(furthest_point_sample_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(furthest_point_sampling_with_dist_forward)
.attr("b")
.attr("n")
.attr("m")
.input(2)
.output(1)
.apply(furthest_point_sampling_with_dist_forward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef FURTHEST_POINT_SAMPLE_PYTORCH_H
#define FURTHEST_POINT_SAMPLE_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
Tensor idx_tensor, int b, int n, int m);
void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
Tensor temp_tensor,
Tensor idx_tensor, int b, int n,
int m);
#endif // FURTHEST_POINT_SAMPLE_PYTORCH_H
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
const Tensor points,
const Tensor idx, Tensor out);
void gather_points_forward_cuda(int b, int c, int n, int npoints,
const Tensor points, const Tensor idx,
Tensor out) {
GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out);
};
void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
const Tensor grad_out,
const Tensor idx,
Tensor grad_points);
void gather_points_backward_cuda(int b, int c, int n, int npoints,
const Tensor grad_out, const Tensor idx,
Tensor grad_points) {
GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx,
grad_points);
};
#endif
void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor out_tensor, int b, int c, int n,
int npoints) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
gather_points_forward_cuda(b, c, n, npoints, points_tensor, idx_tensor,
out_tensor);
#else
AT_ERROR("gather_points is not compiled with GPU support");
#endif
} else {
AT_ERROR("gather_points is not implemented on CPU");
}
}
void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_points_tensor, int b, int c, int n,
int npoints) {
if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
gather_points_backward_cuda(b, c, n, npoints, grad_out_tensor, idx_tensor,
grad_points_tensor);
#else
AT_ERROR("gather_points is not compiled with GPU support");
#endif
} else {
AT_ERROR("gather_points is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "gather_points_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void gather_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, n, npoints;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("n", n)
.get<int>("npoints", npoints)
.done();
auto points_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto out_tensor = buildATensor(ctx, outs[0]);
gather_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n,
npoints);
}
void gather_points_backward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, c, n, npoints;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("c", c)
.get<int>("n", n)
.get<int>("npoints", npoints)
.done();
auto grad_out_tensor = buildATensor(ctx, ins[0]);
auto idx_tensor = buildATensor(ctx, ins[1]);
auto grad_points_tensor = buildATensor(ctx, outs[0]);
gather_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
n, npoints);
}
PARROTS_EXTENSION_REGISTER(gather_points_forward)
.attr("b")
.attr("c")
.attr("n")
.attr("npoints")
.input(2)
.output(1)
.apply(gather_points_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(gather_points_backward)
.attr("b")
.attr("c")
.attr("n")
.attr("npoints")
.input(2)
.output(1)
.apply(gather_points_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef GATHER_POINTS_PYTORCH_H
#define GATHER_POINTS_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor out_tensor, int b, int c, int n, int npoints);
void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor grad_points_tensor, int b, int c, int n,
int npoints);
#endif // GATHER_POINTS_PYTORCH_H
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
const Tensor xyz, const Tensor new_xyz,
Tensor idx, Tensor dist2);
void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz,
const Tensor new_xyz, Tensor idx, Tensor dist2) {
KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2);
}
#endif
void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
Tensor dist2_tensor, int b, int n, int m, int nsample) {
if (new_xyz_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(new_xyz_tensor);
CHECK_CUDA_INPUT(xyz_tensor);
knn_forward_cuda(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor,
dist2_tensor);
#else
AT_ERROR("knn is not compiled with GPU support");
#endif
} else {
AT_ERROR("knn is not implemented on CPU");
}
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "knn_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void knn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int b, n, m, nsample;
SSAttrs(attr)
.get<int>("b", b)
.get<int>("n", n)
.get<int>("m", m)
.get<int>("nsample", nsample)
.done();
auto xyz_tensor = buildATensor(ctx, ins[0]);
auto new_xyz_tensor = buildATensor(ctx, ins[1]);
auto idx_tensor = buildATensor(ctx, outs[0]);
auto dist2_tensor = buildATensor(ctx, outs[1]);
knn_forward(xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor, b, n, m,
nsample);
}
PARROTS_EXTENSION_REGISTER(knn_forward)
.attr("b")
.attr("n")
.attr("m")
.attr("nsample")
.input(2)
.output(2)
.apply(knn_forward_cuda_parrots)
.done();
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment