"git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "5fd4fd2870b45a41c2d0cd4d2b6b4ca8434c4bd2"
Unverified Commit 5066d715 authored by Gao, Xiang's avatar Gao, Xiang Committed by GitHub
Browse files

Don't include CUDAApplyUtils.cuh (#2127)

* Don't include CUDAApplyUtils.cuh

* fix format

* fix atomic
parent f8f131a3
......@@ -70,7 +70,7 @@
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <THC/THCAtomics.cuh>
#include "cuda_helpers.h"
......
......@@ -2,8 +2,8 @@
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include "cuda_helpers.h"
......@@ -337,8 +337,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......@@ -401,8 +400,7 @@ at::Tensor PSROIAlign_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......
......@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <THC/THCAtomics.cuh>
#include "cuda_helpers.h"
......@@ -174,8 +174,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......@@ -235,8 +234,7 @@ at::Tensor PSROIPool_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......
......@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <THC/THCAtomics.cuh>
#include "cuda_helpers.h"
......@@ -335,8 +335,7 @@ at::Tensor ROIAlign_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......@@ -395,8 +394,7 @@ at::Tensor ROIAlign_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......
......@@ -2,7 +2,7 @@
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <THC/THCAtomics.cuh>
#include "cuda_helpers.h"
......@@ -147,8 +147,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(output_size), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......@@ -210,8 +209,7 @@ at::Tensor ROIPool_backward_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(
at::cuda::ATenCeilDiv(
static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
ceil_div(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);
......
......@@ -3,3 +3,8 @@
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
i += (blockDim.x * gridDim.x))
template <typename integer>
constexpr inline integer ceil_div(integer n, integer m) {
return (n + m - 1) / m;
}
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include "cuda_helpers.h"
......@@ -64,7 +63,7 @@ __global__ void nms_kernel(
t |= 1ULL << i;
}
}
const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock);
const int col_blocks = ceil_div(n_boxes, threadsPerBlock);
dev_mask[cur_box_idx * col_blocks + col_start] = t;
}
}
......@@ -81,7 +80,7 @@ at::Tensor nms_cuda(const at::Tensor& dets,
int dets_num = dets.size(0);
const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock);
const int col_blocks = ceil_div(dets_num, threadsPerBlock);
at::Tensor mask =
at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment