Unverified Commit b820b0be authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

[Fix]: fix compilation error in pytorch 1.7 (#393)

* fix compilation error in pytorch 1.7

* add pt1.7 build

* Update build.yml
parent dabf0a26
......@@ -38,7 +38,7 @@ jobs:
strategy:
matrix:
python-version: [3.6, 3.7]
torch: [1.5.0+cu101, 1.6.0+cu101]
torch: [1.5.0+cu101, 1.6.0+cu101, 1.7.0+cu101]
include:
- torch: 1.5.0+cu101
torchvision: 0.6.0+cu101
......@@ -48,6 +48,10 @@ jobs:
mmcv: 1.6.0+cu101
torchvision: 0.7.0+cu101
cuda_arch: "7.0"
- torch: 1.7.0+cu101
mmcv: 1.7.0+cu101
torchvision: 0.8.1+cu101
cuda_arch: "7.0"
steps:
- uses: actions/checkout@v2
......
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <torch/types.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
......@@ -114,17 +115,16 @@ __global__ void coors_map_init_kernel(const int64_t *coors_id,
}
template <typename T, typename T_int>
__global__ void
feats_reduce_kernel(const T *feats, const T_int *coors, int32_t *coors_map,
__global__ void feats_reduce_kernel(
const T *feats, const T_int *coors, int32_t *coors_map,
int32_t *reduce_count, // shall be 0 at initialization
T *reduced_feats, // shall be 0 at initialization
T_int *out_coors, const int num_input, const int num_feats,
const int NDim, const reduce_t reduce_type) {
T_int *out_coors, const int num_input, const int num_feats, const int NDim,
const reduce_t reduce_type) {
for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;
x += gridDim.x * blockDim.x) {
int32_t reduce_to = coors_map[x];
if (reduce_to == -1)
continue;
if (reduce_to == -1) continue;
const T_int *coors_offset = coors + x * NDim;
T_int *out_coors_offset = out_coors + reduce_to * NDim;
......@@ -207,13 +207,13 @@ __global__ void max_reduce_traceback_scatter_idx_kernel(
}
template <typename T>
__global__ void
max_reduce_scatter_grad_kernel(T *grad_feats, const T *grad_reduced_feats,
__global__ void max_reduce_scatter_grad_kernel(T *grad_feats,
const T *grad_reduced_feats,
const int32_t *reduce_from,
const int num_reduced, const int num_feats) {
const int num_reduced,
const int num_feats) {
for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_reduced;
x += gridDim.x * blockDim.x) {
const int reduced_offset = x * num_feats;
const int32_t *scatter_to_offset = reduce_from + reduced_offset;
const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset;
......@@ -227,9 +227,8 @@ max_reduce_scatter_grad_kernel(T *grad_feats, const T *grad_reduced_feats,
namespace voxelization {
std::vector<at::Tensor>
dynamic_point_to_voxel_forward_gpu(const at::Tensor &feats,
const at::Tensor &coors,
std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
const at::Tensor &feats, const at::Tensor &coors,
const reduce_t reduce_type) {
CHECK_INPUT(feats);
CHECK_INPUT(coors);
......@@ -239,11 +238,10 @@ dynamic_point_to_voxel_forward_gpu(const at::Tensor &feats,
const int num_feats = feats.size(1);
auto coors_id = at::empty({num_input}, coors.options().dtype(torch::kInt64));
auto coor_space_dim = coors.max_values(0) + 1;
auto coor_space_dim = std::get<0>(coors.max(0)) + 1;
auto coors_map_sorted =
at::empty({num_input}, coors.options().dtype(torch::kInt32));
auto coors_map =
at::empty({num_input}, coors.options().dtype(torch::kInt32));
auto coors_map = at::empty({num_input}, coors.options().dtype(torch::kInt32));
auto num_coors = at::zeros({1}, coors.options().dtype(torch::kInt32));
AT_DISPATCH_INTEGRAL_TYPES(
......@@ -276,8 +274,7 @@ dynamic_point_to_voxel_forward_gpu(const at::Tensor &feats,
const int num_coors_cpu =
coors_map_sorted[-1].cpu().data_ptr<int32_t>()[0] + 1;
auto out_coors = at::empty({num_coors_cpu, NDim}, coors.options());
auto reduced_feats =
at::empty({num_coors_cpu, num_feats}, feats.options());
auto reduced_feats = at::empty({num_coors_cpu, num_feats}, feats.options());
auto reduce_count =
at::zeros({num_coors_cpu}, coors.options().dtype(torch::kInt32));
......@@ -313,10 +310,12 @@ dynamic_point_to_voxel_forward_gpu(const at::Tensor &feats,
return {reduced_feats, out_coors, coors_map, reduce_count};
}
void dynamic_point_to_voxel_backward_gpu(
at::Tensor &grad_feats, const at::Tensor &grad_reduced_feats,
const at::Tensor &feats, const at::Tensor &reduced_feats,
const at::Tensor &coors_map, const at::Tensor &reduce_count,
void dynamic_point_to_voxel_backward_gpu(at::Tensor &grad_feats,
const at::Tensor &grad_reduced_feats,
const at::Tensor &feats,
const at::Tensor &reduced_feats,
const at::Tensor &coors_map,
const at::Tensor &reduce_count,
const reduce_t reduce_type) {
CHECK_INPUT(grad_feats);
CHECK_INPUT(grad_reduced_feats);
......@@ -336,9 +335,8 @@ void dynamic_point_to_voxel_backward_gpu(
AT_DISPATCH_FLOATING_TYPES(
grad_reduced_feats.scalar_type(), "add_reduce_traceback_grad_kernel",
([&] {
dim3 blocks
(std::min(at::cuda::ATenCeilDiv(num_input, threadsPerBlock),
maxGridDim));
dim3 blocks(std::min(
at::cuda::ATenCeilDiv(num_input, threadsPerBlock), maxGridDim));
dim3 threads(threadsPerBlock);
add_reduce_traceback_grad_kernel<<<blocks, threads>>>(
grad_feats.data_ptr<scalar_t>(),
......@@ -353,9 +351,8 @@ void dynamic_point_to_voxel_backward_gpu(
AT_DISPATCH_FLOATING_TYPES(
grad_reduced_feats.scalar_type(),
"max_reduce_traceback_scatter_idx_kernel", ([&] {
dim3 blocks
(std::min(at::cuda::ATenCeilDiv(num_input, threadsPerBlock),
maxGridDim));
dim3 blocks(std::min(
at::cuda::ATenCeilDiv(num_input, threadsPerBlock), maxGridDim));
dim3 threads(threadsPerBlock);
max_reduce_traceback_scatter_idx_kernel<<<blocks, threads>>>(
feats.data_ptr<scalar_t>(), reduced_feats.data_ptr<scalar_t>(),
......@@ -367,9 +364,8 @@ void dynamic_point_to_voxel_backward_gpu(
AT_DISPATCH_FLOATING_TYPES(
grad_reduced_feats.scalar_type(),
"max_reduce_traceback_scatter_idx_kernel", ([&] {
dim3 blocks(
std::min(at::cuda::ATenCeilDiv(num_reduced, threadsPerBlock),
maxGridDim));
dim3 blocks(std::min(
at::cuda::ATenCeilDiv(num_reduced, threadsPerBlock), maxGridDim));
dim3 threads(threadsPerBlock);
max_reduce_scatter_grad_kernel<<<blocks, threads>>>(
grad_feats.data_ptr<scalar_t>(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment