Unverified Commit 5066d715 authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Don't include CUDAApplyUtils.cuh (#2127)

* Don't include CUDAApplyUtils.cuh

* fix format

* fix atomic
parent f8f131a3
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
#include <ATen/TensorUtils.h> #include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#include <ATen/TensorUtils.h> #include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <THC/THCAtomics.cuh>
#include <stdio.h> #include <stdio.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
...@@ -337,8 +337,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cuda( ...@@ -337,8 +337,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
...@@ -401,8 +400,7 @@ at::Tensor PSROIAlign_backward_cuda( ...@@ -401,8 +400,7 @@ at::Tensor PSROIAlign_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h> #include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
...@@ -174,8 +174,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda( ...@@ -174,8 +174,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
...@@ -235,8 +234,7 @@ at::Tensor PSROIPool_backward_cuda( ...@@ -235,8 +234,7 @@ at::Tensor PSROIPool_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h> #include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
...@@ -335,8 +335,7 @@ at::Tensor ROIAlign_forward_cuda( ...@@ -335,8 +335,7 @@ at::Tensor ROIAlign_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
...@@ -395,8 +394,7 @@ at::Tensor ROIAlign_backward_cuda( ...@@ -395,8 +394,7 @@ at::Tensor ROIAlign_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h> #include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
...@@ -147,8 +147,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda( ...@@ -147,8 +147,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
...@@ -210,8 +209,7 @@ at::Tensor ROIPool_backward_cuda( ...@@ -210,8 +209,7 @@ at::Tensor ROIPool_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min( dim3 grid(std::min(
at::cuda::ATenCeilDiv( ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096))); static_cast<int64_t>(4096)));
dim3 block(512); dim3 block(512);
......
...@@ -3,3 +3,8 @@ ...@@ -3,3 +3,8 @@
#define CUDA_1D_KERNEL_LOOP(i, n) \ #define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \ for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
i += (blockDim.x * gridDim.x)) i += (blockDim.x * gridDim.x))
template <typename integer>
constexpr inline integer ceil_div(integer n, integer m) {
return (n + m - 1) / m;
}
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
...@@ -64,7 +63,7 @@ __global__ void nms_kernel( ...@@ -64,7 +63,7 @@ __global__ void nms_kernel(
t |= 1ULL << i; t |= 1ULL << i;
} }
} }
const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); const int col_blocks = ceil_div(n_boxes, threadsPerBlock);
dev_mask[cur_box_idx * col_blocks + col_start] = t; dev_mask[cur_box_idx * col_blocks + col_start] = t;
} }
} }
...@@ -81,7 +80,7 @@ at::Tensor nms_cuda(const at::Tensor& dets, ...@@ -81,7 +80,7 @@ at::Tensor nms_cuda(const at::Tensor& dets,
int dets_num = dets.size(0); int dets_num = dets.size(0);
const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock); const int col_blocks = ceil_div(dets_num, threadsPerBlock);
at::Tensor mask = at::Tensor mask =
at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment