Commit ecc9800a authored by tpoisonooo's avatar tpoisonooo Committed by Zaida Zhou
Browse files

Fix warning of CUDA ops (#2324)

parent e0b3223b
...@@ -44,6 +44,7 @@ __device__ bool compare_vertices(float x1, float y1, float x2, float y2) { ...@@ -44,6 +44,7 @@ __device__ bool compare_vertices(float x1, float y1, float x2, float y2) {
else else
return false; return false;
} }
return false;
} }
__global__ void diff_iou_rotated_sort_vertices_forward_cuda_kernel( __global__ void diff_iou_rotated_sort_vertices_forward_cuda_kernel(
......
...@@ -36,7 +36,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos, ...@@ -36,7 +36,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
Index m, offset; Index m, offset;
bool valid = false; bool valid = false;
#pragma unroll #pragma unroll
for (int i = 0; i < NDim; ++i) { for (unsigned i = 0; i < NDim; ++i) {
lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 + lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +
stride[i] + padding[i]) / stride[i] + padding[i]) /
stride[i]; stride[i];
...@@ -50,7 +50,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos, ...@@ -50,7 +50,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
} }
#pragma unroll #pragma unroll
for (int i = 0; i < NDim; ++i) { for (unsigned i = 0; i < NDim; ++i) {
counter[i] = 0; counter[i] = 0;
} }
for (int i = 0; i < numPoints; ++i) { for (int i = 0; i < numPoints; ++i) {
...@@ -98,7 +98,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose( ...@@ -98,7 +98,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
Index m, offset; Index m, offset;
bool valid = false; bool valid = false;
#pragma unroll #pragma unroll
for (int i = 0; i < NDim; ++i) { for (unsigned i = 0; i < NDim; ++i) {
lowers[i] = input_pos[i] * stride[i] - padding[i]; lowers[i] = input_pos[i] * stride[i] - padding[i];
uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i]; uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i];
} }
...@@ -108,7 +108,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose( ...@@ -108,7 +108,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
numPoints *= counterSize[i]; numPoints *= counterSize[i];
} }
#pragma unroll #pragma unroll
for (int i = 0; i < NDim; ++i) { for (unsigned i = 0; i < NDim; ++i) {
counter[i] = 0; counter[i] = 0;
} }
for (int i = 0; i < numPoints; ++i) { for (int i = 0; i < numPoints; ++i) {
...@@ -251,9 +251,7 @@ Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn, ...@@ -251,9 +251,7 @@ Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
const Index *const stride, const Index *const padding, const Index *const stride, const Index *const padding,
const Index *dilation, const Index *dilation,
const Index *const outSpatialShape) { const Index *const outSpatialShape) {
Index numAct = 0;
auto numActIn = indicesIn.dim(0); auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1; Index spatialVolume = 1;
#pragma unroll #pragma unroll
for (int i = 0; i < NDim; ++i) { for (int i = 0; i < NDim; ++i) {
......
...@@ -40,9 +40,6 @@ torch::Tensor FusedIndiceConvBatchnormCUDAKernelLauncher( ...@@ -40,9 +40,6 @@ torch::Tensor FusedIndiceConvBatchnormCUDAKernelLauncher(
// add. // add.
torch::mm_out(output, features, filters[indicePairMaxOffset]); torch::mm_out(output, features, filters[indicePairMaxOffset]);
} }
double totalGatherTime = 0;
double totalGEMMTime = 0;
double totalSAddTime = 0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data_ptr<int>()[i]; auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
......
...@@ -17,7 +17,6 @@ torch::Tensor IndiceMaxpoolForwardCUDAKernelLauncher(torch::Tensor features, ...@@ -17,7 +17,6 @@ torch::Tensor IndiceMaxpoolForwardCUDAKernelLauncher(torch::Tensor features,
auto options = auto options =
torch::TensorOptions().dtype(features.dtype()).device(features.device()); torch::TensorOptions().dtype(features.dtype()).device(features.device());
torch::Tensor output = torch::zeros({numAct, numInPlanes}, options); torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);
double totalTime = 0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data_ptr<int>()[i]; auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) { if (nHot <= 0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment