Commit 14cb538f authored by yhcao6's avatar yhcao6
Browse files

clean unnecessary comment

parent 9acb38be
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
#include <torch/torch.h> #include <torch/torch.h>
#include <cmath> #include <cmath>
...@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset, ...@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
int dilationW, int deformable_group) int dilationW, int deformable_group)
{ {
// AT_CHECK(weight->nDimension == 4, 5,
// "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
// "but got: %s",
// weight->nDimension);
AT_CHECK(weight.ndimension() == 4, AT_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s", "but got: %s",
...@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset, ...@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"kernel size should be greater than zero, but got kH: %d kW: %d", "kernel size should be greater than zero, but got kH: %d kW: %d",
kH, kW); kH, kW);
// AT_CHECK((weight->size[2] == kH && weight->size[3] == kW), 9,
// "kernel size should be consistent with weight, ",
// "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
// kW, weight->size[2], weight->size[3]);
AT_CHECK((weight.size(2) == kH && AT_CHECK((weight.size(2) == kH &&
weight.size(3) == kW), weight.size(3) == kW),
"kernel size should be consistent with weight, ", "kernel size should be consistent with weight, ",
...@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset, ...@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d", "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW); dilationH, dilationW);
// int ndim = input->nDimension;
int ndim = input.ndimension(); int ndim = input.ndimension();
int dimf = 0; int dimf = 0;
int dimh = 1; int dimh = 1;
...@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset, ...@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK(ndim == 3 || ndim == 4, AT_CHECK(ndim == 3 || ndim == 4,
"3D or 4D input tensor expected but got: %s", ndim); "3D or 4D input tensor expected but got: %s", ndim);
// long nInputPlane = weight->size[1];
// long inputHeight = input->size[dimh];
// long inputWidth = input->size[dimw];
// long nOutputPlane = weight->size[0];
long nInputPlane = weight.size(1); long nInputPlane = weight.size(1);
long inputHeight = input.size(dimh); long inputHeight = input.size(dimh);
long inputWidth = input.size(dimw); long inputWidth = input.size(dimw);
...@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset, ...@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK((inputHeight >= kH && inputWidth >= kW), AT_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel"); "input image is smaller than kernel");
// AT_CHECK(
// (offset->size[2] == outputHeight && offset->size[3] == outputWidth), 3,
// "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", outputHeight, outputWidth,
// offset->size[2], offset->size[3]);
AT_CHECK( AT_CHECK(
(offset.size(2) == outputHeight && offset.size(3) == outputWidth), (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d",
...@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, ...@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
// todo: add new output buffer and transpose it to output (or directly transpose output) // todo: add new output buffer and transpose it to output (or directly transpose output)
// todo: possibly change data indexing because of parallel_imgs // todo: possibly change data indexing because of parallel_imgs
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, weight, offset,
// output, columns, ones));
shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
dilationH, dilationW, deformable_group); dilationH, dilationW, deformable_group);
...@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, ...@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
// bias = bias ? THCudaTensor_newContiguous(state, bias) : bias;
output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth});
columns = at::zeros({nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.type()); columns = at::zeros({nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.type());
...@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, ...@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
output_buffer[elt].flatten(1).addmm_(weight.flatten(1), columns).view_as(output_buffer[elt]); output_buffer[elt].flatten(1).addmm_(weight.flatten(1), columns).view_as(output_buffer[elt]);
} }
// the reason I use seemingly redundant output_buffer is that THCudaTensor API handles successive transpose and resize poorly
output_buffer = output_buffer.view( output_buffer = output_buffer.view(
{batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); {batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth});
output_buffer.transpose_(1, 2); output_buffer.transpose_(1, 2);
...@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda( ...@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda(
int dilationW, int dilationH, int deformable_group, int im2col_step) int dilationW, int dilationH, int deformable_group, int im2col_step)
{ {
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, gradOutput, weight,
// offset, columns, gradInput));
shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH,
padW, dilationH, dilationW, deformable_group); padW, dilationH, dilationW, deformable_group);
...@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda( ...@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda(
// todo: transpose and reshape outGrad // todo: transpose and reshape outGrad
// todo: reshape columns // todo: reshape columns
// todo: add im2col_step as input // todo: add im2col_step as input
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, offset, gradOutput,
// gradWeight, columns));
shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW,
padH, padW, dilationH, dilationW, deformable_group); padH, padW, dilationH, dilationW, deformable_group);
...@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda( ...@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda(
inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW,
im2col_step, deformable_group, columns); im2col_step, deformable_group, columns);
gradWeight.copy_(gradWeight.flatten(1).addmm_( gradWeight = gradWeight.flatten(1).addmm_(
gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale).view_as(gradWeight)); gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale)
.view_as(gradWeight);
} }
input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
......
...@@ -58,6 +58,8 @@ ...@@ -58,6 +58,8 @@
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
*/ */
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include <stdio.h> #include <stdio.h>
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
* \author Yi Li, Guodong Zhang, Jifeng Dai * \author Yi Li, Guodong Zhang, Jifeng Dai
*/ */
/***************** Adapted by Charles Shang *********************/ /***************** Adapted by Charles Shang *********************/
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include <stdio.h> #include <stdio.h>
......
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob /mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
#include <torch/torch.h> #include <torch/torch.h>
#include <cmath> #include <cmath>
#include <vector> #include <vector>
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
void modulated_deformable_im2col_cuda(const at::Tensor data_im, const at::Tensor data_offset, void modulated_deformable_im2col_cuda(const at::Tensor data_im, const at::Tensor data_offset,
const at::Tensor data_mask, const int batch_size, const int channels, const at::Tensor data_mask, const int batch_size, const int channels,
const int height_im, const int width_im, const int height_col, const int height_im, const int width_im, const int height_col,
...@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at: ...@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at:
const int deformable_group, at::Tensor grad_offset, const int deformable_group, at::Tensor grad_offset,
at::Tensor grad_mask); at::Tensor grad_mask);
void DeformablePSROIPoolForward(const at::Tensor data, void DeformablePSROIPoolForward(const at::Tensor data,
const at::Tensor bbox, const at::Tensor bbox,
const at::Tensor trans, const at::Tensor trans,
...@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, ...@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
const int sample_per_part, const int sample_per_part,
const float trans_std); const float trans_std);
void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight, void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
at::Tensor bias, at::Tensor ones, at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask, at::Tensor offset, at::Tensor mask,
...@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight, ...@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int deformable_group) const int deformable_group)
{ {
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
...@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight, ...@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
int dilation_h, int dilation_w, int dilation_h, int dilation_w,
int deformable_group) int deformable_group)
{ {
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
// grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
...@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight, ...@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
grad_bias = grad_bias.view({-1, 1}).addmm_(grad_output[b].flatten(1), ones.view({-1, 1})).view(-1); grad_bias = grad_bias.view({-1, 1}).addmm_(grad_output[b].flatten(1), ones.view({-1, 1})).view(-1);
} }
} }
void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox, void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
...@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox, ...@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
const float trans_std) const float trans_std)
{ {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
const int batch = input.size(0); const int batch = input.size(0);
const int channels = input.size(1); const int channels = input.size(1);
const int height = input.size(2); const int height = input.size(2);
const int width = input.size(3); const int width = input.size(3);
const int channels_trans = no_trans? 2 : trans.size(1); const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0); const int num_bbox = bbox.size(0);
if (num_bbox != out.size(0)) if (num_bbox != out.size(0))
...@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad, ...@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
{ {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
// input_grad, trans_grad));
const int batch = input.size(0); const int batch = input.size(0);
const int channels = input.size(1); const int channels = input.size(1);
const int height = input.size(2); const int height = input.size(2);
const int width = input.size(3); const int width = input.size(3);
const int channels_trans = no_trans? 2 : trans.size(1); const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0); const int num_bbox = bbox.size(0);
if (num_bbox != out_grad.size(0)) if (num_bbox != out_grad.size(0))
......
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/modulated_deform_im2col_cuda.cu
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include <stdio.h> #include <stdio.h>
...@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N) ...@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N)
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
} }
template <typename scalar_t> template <typename scalar_t>
__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width, __device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
const int height, const int width, scalar_t h, scalar_t w) const int height, const int width, scalar_t h, scalar_t w)
...@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda( ...@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda(
const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int deformable_group, at::Tensor data_col) { const int deformable_group, at::Tensor data_col)
{
// num_axes should be smaller than block size // num_axes should be smaller than block size
const int channel_per_deformable_group = channels / deformable_group; const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * batch_size * height_col * width_col; const int num_kernels = channels * batch_size * height_col * width_col;
...@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda( ...@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda(
const scalar_t *data_mask_ = data_mask.data<scalar_t>(); const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>(); scalar_t *data_col_ = data_col.data<scalar_t>();
modulated_deformable_im2col_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>( modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
batch_size, channels, deformable_group, height_col, width_col, data_col_); batch_size, channels, deformable_group, height_col, width_col, data_col_);
...@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda( ...@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda(
{ {
printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
} }
} }
void modulated_deformable_col2im_cuda( void modulated_deformable_col2im_cuda(
...@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda( ...@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda(
const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int deformable_group, at::Tensor grad_im){ const int deformable_group, at::Tensor grad_im)
{
const int channel_per_deformable_group = channels / deformable_group; const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
...@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda( ...@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda(
const scalar_t *data_mask_ = data_mask.data<scalar_t>(); const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>(); scalar_t *grad_im_ = grad_im.data<scalar_t>();
modulated_deformable_col2im_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>( modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w, kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group, dilation_h, dilation_w, channel_per_deformable_group,
...@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda( ...@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda(
{ {
printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
} }
} }
void modulated_deformable_col2im_coord_cuda( void modulated_deformable_col2im_coord_cuda(
...@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda( ...@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda(
const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int deformable_group, const int deformable_group,
at::Tensor grad_offset, at::Tensor grad_mask) { at::Tensor grad_offset, at::Tensor grad_mask)
{
const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
...@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda( ...@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda(
scalar_t *grad_offset_ = grad_offset.data<scalar_t>(); scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data<scalar_t>(); scalar_t *grad_mask_ = grad_mask.data<scalar_t>();
modulated_deformable_col2im_coord_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>( modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group, dilation_h, dilation_w, channel_per_deformable_group,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment