clean unnecessary comment

14cb538f · yhcao6 · 9acb38be · 14cb538f · 14cb538f · 14cb538f
Commit 14cb538f authored Jan 16, 2019 by yhcao6
5 changed files
--- a/mmdet/ops/dcn/src/deform_conv_cuda.cpp
+++ b/mmdet/ops/dcn/src/deform_conv_cuda.cpp
+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
+
 #include <torch/torch.h>

 #include <cmath>
@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
                 int dilationW, int deformable_group)
 {

-    //  AT_CHECK(weight->nDimension == 4, 5,
-    //             "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
-    //             "but got: %s",
-    //             weight->nDimension);
    AT_CHECK(weight.ndimension() == 4,
             "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
             "but got: %s",
@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
             "kernel size should be greater than zero, but got kH: %d kW: %d",
             kH, kW);

-    //  AT_CHECK((weight->size[2] == kH && weight->size[3] == kW), 9,
-    //             "kernel size should be consistent with weight, ",
-    //             "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
-    //             kW, weight->size[2], weight->size[3]);
    AT_CHECK((weight.size(2) == kH &&
              weight.size(3) == kW),
             "kernel size should be consistent with weight, ",
@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
             "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
             dilationH, dilationW);

-    //  int ndim = input->nDimension;
    int ndim = input.ndimension();
    int dimf = 0;
    int dimh = 1;
@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
    AT_CHECK(ndim == 3 || ndim == 4,
             "3D or 4D input tensor expected but got: %s", ndim);

-    //  long nInputPlane = weight->size[1];
-    //  long inputHeight = input->size[dimh];
-    //  long inputWidth = input->size[dimw];
-    //  long nOutputPlane = weight->size[0];
    long nInputPlane = weight.size(1);
    long inputHeight = input.size(dimh);
    long inputWidth = input.size(dimw);
@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
    AT_CHECK((inputHeight >= kH && inputWidth >= kW),
             "input image is smaller than kernel");

-    //  AT_CHECK(
-    //      (offset->size[2] == outputHeight && offset->size[3] == outputWidth), 3,
-    //      "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", outputHeight, outputWidth,
-    //      offset->size[2], offset->size[3]);
    AT_CHECK(
        (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
        "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d",
@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
    // todo: add new output buffer and transpose it to output (or directly transpose output)
    // todo: possibly change data indexing because of parallel_imgs

-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, weight, offset,
-    //                                        output, columns, ones));
-
    shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
                dilationH, dilationW, deformable_group);

@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,

    AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

-    // bias = bias ? THCudaTensor_newContiguous(state, bias) : bias;
-
    output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth});
    columns = at::zeros({nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.type());

@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
            output_buffer[elt].flatten(1).addmm_(weight.flatten(1), columns).view_as(output_buffer[elt]);
    }

-    // the reason I use seemingly redundant output_buffer is that THCudaTensor API handles successive transpose and resize poorly
    output_buffer = output_buffer.view(
        {batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth});
    output_buffer.transpose_(1, 2);
@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda(
    int dilationW, int dilationH, int deformable_group, int im2col_step)
 {

-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, gradOutput, weight,
-    //                                        offset, columns, gradInput));
-
    shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH,
                padW, dilationH, dilationW, deformable_group);

@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda(
    // todo: transpose and reshape outGrad
    // todo: reshape columns
    // todo: add im2col_step as input
-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, offset, gradOutput,
-    //                                        gradWeight, columns));

    shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW,
                padH, padW, dilationH, dilationW, deformable_group);
@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda(
            inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW,
            im2col_step, deformable_group, columns);

-        gradWeight.copy_(gradWeight.flatten(1).addmm_(
-            gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale).view_as(gradWeight));
+        gradWeight = gradWeight.flatten(1).addmm_(
+                                              gradOutputBuffer[elt].flatten(1), columns.transpose(1, 0), 1.0, scale)
+                         .view_as(gradWeight);
    }

    input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});

--- a/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+++ b/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
@@ -58,6 +58,8 @@
 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
 */

+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
 #include <ATen/ATen.h>
 #include <THC/THCAtomics.cuh>
 #include <stdio.h>

--- a/mmdet/ops/dcn/src/deform_psroi_pooling_cuda.cu
+++ b/mmdet/ops/dcn/src/deform_psroi_pooling_cuda.cu
@@ -6,6 +6,8 @@
 * \author Yi Li, Guodong Zhang, Jifeng Dai
 */
 /***************** Adapted by Charles Shang *********************/
+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu
+
 #include <ATen/ATen.h>
 #include <THC/THCAtomics.cuh>
 #include <stdio.h>

--- a/mmdet/ops/dcn/src/modulated_dcn_cuda.cpp
+++ b/mmdet/ops/dcn/src/modulated_dcn_cuda.cpp
+// author: Charles Shang
+// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
+
+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob /mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
+
 #include <torch/torch.h>

 #include <cmath>
 #include <vector>

-// author: Charles Shang
-// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
-
-
 void modulated_deformable_im2col_cuda(const at::Tensor data_im, const at::Tensor data_offset,
                                      const at::Tensor data_mask, const int batch_size, const int channels,
                                      const int height_im, const int width_im, const int height_col,
@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at:
                                            const int deformable_group, at::Tensor grad_offset,
                                            at::Tensor grad_mask);

-
 void DeformablePSROIPoolForward(const at::Tensor data,
                                const at::Tensor bbox,
                                const at::Tensor trans,
@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
                                    const int sample_per_part,
                                    const float trans_std);

-
 void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
                                        at::Tensor bias, at::Tensor ones,
                                        at::Tensor offset, at::Tensor mask,
@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
                                        const int dilation_h, const int dilation_w,
                                        const int deformable_group)
 {
-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
    AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
    AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
                                         int dilation_h, int dilation_w,
                                         int deformable_group)
 {
-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
-    //                                        grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
    AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
    AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,

        grad_bias = grad_bias.view({-1, 1}).addmm_(grad_output[b].flatten(1), ones.view({-1, 1})).view(-1);
    }
-
 }

 void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor  input, at::Tensor  bbox,
                                       const float trans_std)
 {
    AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));

    const int batch = input.size(0);
    const int channels = input.size(1);
    const int height = input.size(2);
    const int width = input.size(3);
-    const int channels_trans = no_trans? 2 : trans.size(1);
+    const int channels_trans = no_trans ? 2 : trans.size(1);

    const int num_bbox = bbox.size(0);
    if (num_bbox != out.size(0))
@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor  out_grad,
 {
    AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
    AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
-    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
-    //                 input_grad, trans_grad));

    const int batch = input.size(0);
    const int channels = input.size(1);
    const int height = input.size(2);
    const int width = input.size(3);
-    const int channels_trans = no_trans? 2 : trans.size(1);
+    const int channels_trans = no_trans ? 2 : trans.size(1);

    const int num_bbox = bbox.size(0);
    if (num_bbox != out_grad.size(0))

--- a/mmdet/ops/dcn/src/modulated_deform_im2col_cuda.cu
+++ b/mmdet/ops/dcn/src/modulated_deform_im2col_cuda.cu
+// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/modulated_deform_im2col_cuda.cu
+
 #include <ATen/ATen.h>
 #include <THC/THCAtomics.cuh>
 #include <stdio.h>
@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N)
  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 }

-
 template <typename scalar_t>
 __device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,
                                         const int height, const int width, scalar_t h, scalar_t w)
@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda(
    const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
-  const int deformable_group, at::Tensor data_col) {
+    const int deformable_group, at::Tensor data_col)
+{
  // num_axes should be smaller than block size
  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels = channels * batch_size * height_col * width_col;
@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda(
        const scalar_t *data_mask_ = data_mask.data<scalar_t>();
        scalar_t *data_col_ = data_col.data<scalar_t>();

-        modulated_deformable_im2col_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
+        modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
            num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
            batch_size, channels, deformable_group, height_col, width_col, data_col_);
@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda(
  {
    printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
  }
-
 }

 void modulated_deformable_col2im_cuda(
@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda(
    const int height_col, const int width_col, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
-  const int deformable_group, at::Tensor grad_im){
+    const int deformable_group, at::Tensor grad_im)
+{

  const int channel_per_deformable_group = channels / deformable_group;
  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda(
        const scalar_t *data_mask_ = data_mask.data<scalar_t>();
        scalar_t *grad_im_ = grad_im.data<scalar_t>();

-        modulated_deformable_col2im_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
+        modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
            num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
            kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group,
@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda(
  {
    printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
  }
-
 }

 void modulated_deformable_col2im_coord_cuda(
@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda(
    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    const int deformable_group,
-  at::Tensor grad_offset, at::Tensor grad_mask) {
+    at::Tensor grad_offset, at::Tensor grad_mask)
+{
  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;

@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda(
        scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
        scalar_t *grad_mask_ = grad_mask.data<scalar_t>();

-        modulated_deformable_col2im_coord_gpu_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
+        modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
            num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
            dilation_h, dilation_w, channel_per_deformable_group,