Commit ecc9800a authored by tpoisonooo's avatar tpoisonooo Committed by Zaida Zhou
Browse files

Fix warning of CUDA ops (#2324)

parent e0b3223b
......@@ -44,6 +44,7 @@ __device__ bool compare_vertices(float x1, float y1, float x2, float y2) {
else
return false;
}
return false;
}
__global__ void diff_iou_rotated_sort_vertices_forward_cuda_kernel(
......
......@@ -36,7 +36,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
Index m, offset;
bool valid = false;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
for (unsigned i = 0; i < NDim; ++i) {
lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +
stride[i] + padding[i]) /
stride[i];
......@@ -50,7 +50,7 @@ TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
}
#pragma unroll
for (int i = 0; i < NDim; ++i) {
for (unsigned i = 0; i < NDim; ++i) {
counter[i] = 0;
}
for (int i = 0; i < numPoints; ++i) {
......@@ -98,7 +98,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
Index m, offset;
bool valid = false;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
for (unsigned i = 0; i < NDim; ++i) {
lowers[i] = input_pos[i] * stride[i] - padding[i];
uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i];
}
......@@ -108,7 +108,7 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
numPoints *= counterSize[i];
}
#pragma unroll
for (int i = 0; i < NDim; ++i) {
for (unsigned i = 0; i < NDim; ++i) {
counter[i] = 0;
}
for (int i = 0; i < numPoints; ++i) {
......@@ -251,9 +251,7 @@ Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
const Index *const stride, const Index *const padding,
const Index *dilation,
const Index *const outSpatialShape) {
Index numAct = 0;
auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
......
......@@ -40,9 +40,6 @@ torch::Tensor FusedIndiceConvBatchnormCUDAKernelLauncher(
// add.
torch::mm_out(output, features, filters[indicePairMaxOffset]);
}
double totalGatherTime = 0;
double totalGEMMTime = 0;
double totalSAddTime = 0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
......
......@@ -17,7 +17,6 @@ torch::Tensor IndiceMaxpoolForwardCUDAKernelLauncher(torch::Tensor features,
auto options =
torch::TensorOptions().dtype(features.dtype()).device(features.device());
torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);
double totalTime = 0;
for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data_ptr<int>()[i];
if (nHot <= 0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment