Commit 14cb538f authored by yhcao6's avatar yhcao6
Browse files

clean unnecessary comment

parent 9acb38be
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
#include <torch/torch.h>
#include <cmath>
......@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
int dilationW, int deformable_group)
{
// AT_CHECK(weight->nDimension == 4, 5,
// "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
// "but got: %s",
// weight->nDimension);
AT_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s",
......@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"kernel size should be greater than zero, but got kH: %d kW: %d",
kH, kW);
// AT_CHECK((weight->size[2] == kH && weight->size[3] == kW), 9,
// "kernel size should be consistent with weight, ",
// "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
// kW, weight->size[2], weight->size[3]);
AT_CHECK((weight.size(2) == kH &&
weight.size(3) == kW),
"kernel size should be consistent with weight, ",
......@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW);
// int ndim = input->nDimension;
int ndim = input.ndimension();
int dimf = 0;
int dimh = 1;
......@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK(ndim == 3 || ndim == 4,
"3D or 4D input tensor expected but got: %s", ndim);
// long nInputPlane = weight->size[1];
// long inputHeight = input->size[dimh];
// long inputWidth = input->size[dimw];
// long nOutputPlane = weight->size[0];
long nInputPlane = weight.size(1);
long inputHeight = input.size(dimh);
long inputWidth = input.size(dimw);
......@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel");
// AT_CHECK(
// (offset->size[2] == outputHeight && offset->size[3] == outputWidth), 3,
// "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", outputHeight, outputWidth,
// offset->size[2], offset->size[3]);
AT_CHECK(
(offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d",
......@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
// todo: add new output buffer and transpose it to output (or directly transpose output)
// todo: possibly change data indexing because of parallel_imgs
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, weight, offset,
// output, columns, ones));
shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
dilationH, dilationW, deformable_group);
......@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
// bias = bias ? THCudaTensor_newContiguous(state, bias) : bias;
output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth});
columns = at::zeros({nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.type());
......@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
output_buffer[elt].flatten(1).addmm_(weight.flatten(1), columns).view_as(output_buffer[elt]);
}
// the reason I use seemingly redundant output_buffer is that THCudaTensor API handles successive transpose and resize poorly
output_buffer = output_buffer.view(
{batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth});
output_buffer.transpose_(1, 2);
......@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda(
int dilationW, int dilationH, int deformable_group, int im2col_step)
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, gradOutput, weight,
// offset, columns, gradInput));
shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH,
padW, dilationH, dilationW, deformable_group);
......@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda(
// todo: transpose and reshape outGrad
// todo: reshape columns
// todo: add im2col_step as input
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, offset, gradOutput,
// gradWeight, columns));
shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW,
padH, padW, dilationH, dilationW, deformable_group);
......@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda(
inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW,
im2col_step, deformable_group, columns);
gradWeight.copy_(gradWeight.flatten(1).addmm_(
gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale).view_as(gradWeight));
gradWeight = gradWeight.flatten(1).addmm_(
gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale)
.view_as(gradWeight);
}
input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
......
......@@ -58,6 +58,8 @@
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
*/
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
......
......@@ -6,6 +6,8 @@
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
......
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob /mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
#include <torch/torch.h>
#include <cmath>
#include <vector>
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
void modulated_deformable_im2col_cuda(const at::Tensor data_im, const at::Tensor data_offset,
const at::Tensor data_mask, const int batch_size, const int channels,
const int height_im, const int width_im, const int height_col,
......@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at:
const int deformable_group, at::Tensor grad_offset,
at::Tensor grad_mask);
void DeformablePSROIPoolForward(const at::Tensor data,
const at::Tensor bbox,
const at::Tensor trans,
......@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
const int sample_per_part,
const float trans_std);
void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
at::Tensor bias, at::Tensor ones,
at::Tensor offset, at::Tensor mask,
......@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
const int dilation_h, const int dilation_w,
const int deformable_group)
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
......@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
int dilation_h, int dilation_w,
int deformable_group)
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
// grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
......@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
grad_bias = grad_bias.view({-1, 1}).addmm_(grad_output[b].flatten(1), ones.view({-1, 1})).view(-1);
}
}
void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
......@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
const float trans_std)
{
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans? 2 : trans.size(1);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out.size(0))
......@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
{
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
// input_grad, trans_grad));
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans? 2 : trans.size(1);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out_grad.size(0))
......
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/modulated_deform_im2col_cuda.cu
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
......@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N)
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
template <typename scalar_t>
__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
const int height, const int width, scalar_t h, scalar_t w)
......@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda(
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, at::Tensor data_col) {
const int deformable_group, at::Tensor data_col)
{
// num_axes should be smaller than block size
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * batch_size * height_col * width_col;
......@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda(
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>();
modulated_deformable_im2col_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
batch_size, channels, deformable_group, height_col, width_col, data_col_);
......@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda(
{
printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_cuda(
......@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda(
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, at::Tensor grad_im){
const int deformable_group, at::Tensor grad_im)
{
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
......@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda(
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>();
modulated_deformable_col2im_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
......@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda(
{
printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_coord_cuda(
......@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda(
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
at::Tensor grad_offset, at::Tensor grad_mask) {
at::Tensor grad_offset, at::Tensor grad_mask)
{
const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
......@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda(
scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data<scalar_t>();
modulated_deformable_col2im_coord_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment