Add new parrots extension implementation for all ops (#794)

* delete all parrots file add bbox_overlaps new parrots op impl * support first new impl parrts op (bbox_overlaps)(success test) * add box_iou_rotated op, test succeed * add carafe and carafe_naive op, test succeed (one parrots bug need fix) * add cc_attention op, test success * add corner_pool op, test success * add parrots op deform_conv, test success * add deform_roi_pool op, test success (but has question) * add focal loss op, test success (gradcheck) * add masked_conv2d op, test success * add modulated_deform_conv op, test success * add nms and nms_rotated op, test success * add psamask op, test success * add roi_align op, test_success * add roi_pool op, test success * add sync_bn op, test success * add tin_shift op, test success * fix test_deform_roi_pool, add parrots test * skip test_onnx because parrots does not support onnx * fix c++ lint * fix python lint * fix python lint

Add new parrots extension implementation for all ops (#794)
* delete all parrots file add bbox_overlaps new parrots op impl * support first new impl parrts op (bbox_overlaps)(success test) * add box_iou_rotated op, test succeed * add carafe and carafe_naive op, test succeed (one parrots bug need fix) * add cc_attention op, test success * add corner_pool op, test success * add parrots op deform_conv, test success * add deform_roi_pool op, test success (but has question) * add focal loss op, test success (gradcheck) * add masked_conv2d op, test success * add modulated_deform_conv op, test success * add nms and nms_rotated op, test success * add psamask op, test success * add roi_align op, test_success * add roi_pool op, test success * add sync_bn op, test success * add tin_shift op, test success * fix test_deform_roi_pool, add parrots test * skip test_onnx because parrots does not support onnx * fix c++ lint * fix python lint * fix python lint
48d99025 · z55250825 · GitHub · 72e4cc12 · 48d99025 · 48d99025
Unverified Commit 48d99025 authored Feb 26, 2021 by z55250825 Committed by GitHub Feb 26, 2021
20 changed files
--- a/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
+++ b/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
+#ifndef MASKED_CONV2D_PYTORCH_H
+#define MASKED_CONV2D_PYTORCH_H
+#include <torch/extension.h>
+using namespace at;
+void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
+                                const Tensor mask_w_idx, Tensor col,
+                                const int kernel_h, const int kernel_w,
+                                const int pad_h, const int pad_w);
+void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
+                                const Tensor mask_w_idx, Tensor im, int height,
+                                int width, int channels);
+#endif  // MASKED_CONV2D_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
+++ b/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
-// Copyright (c) 2019, SenseTime.
+#include "pytorch_cpp_helper.hpp"
-#include "parrots_cpp_helper.hpp"
+#ifdef MMCV_WITH_CUDA
 void ModulatedDeformConvForwardCUDAKernelLauncher(
-    const DArrayLite input, const DArrayLite weight, const DArrayLite bias,
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    const DArrayLite ones, const DArrayLite offset, const DArrayLite mask,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
-    DArrayLite output, DArrayLite columns, int kernel_h, int kernel_w,
    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
    const int dilation_h, const int dilation_w, const int group,
-    int deformable_group, const bool with_bias, CudaContext& ctx,
+    const int deformable_group, const bool with_bias);
-    cudaStream_t stream);
 void ModulatedDeformConvBackwardCUDAKernelLauncher(
-    const DArrayLite input, const DArrayLite weight, const DArrayLite bias,
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    const DArrayLite ones, const DArrayLite offset, const DArrayLite mask,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
-    DArrayLite columns, DArrayLite grad_input, DArrayLite grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
-    DArrayLite grad_bias, DArrayLite grad_offset, DArrayLite grad_mask,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
-    DArrayLite grad_output, int kernel_h, int kernel_w, int stride_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
-    int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w,
+    const bool with_bias);
-    int group, int deformable_group, const bool with_bias, CudaContext& ctx,
-    cudaStream_t stream);
-void modulated_deform_conv_forward_cuda(CudaContext& ctx, const SSElement& attr,
+void modulated_deform_conv_forward_cuda(
-                                        const OperatorBase::in_list_t& ins,
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-                                        OperatorBase::out_list_t& outs) {
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
-      dilation_w, group, deformable_group, with_bias;
+    const int dilation_h, const int dilation_w, const int group,
-  SSAttrs(attr)
+    const int deformable_group, const bool with_bias) {
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-  auto input = ins[0];
-  auto weight = ins[1];
-  auto bias = ins[2];
-  auto ones = ins[3];
-  auto offset = ins[4];
-  auto mask = ins[5];
-  auto output = outs[0];
-  auto columns = outs[1];
-  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
  ModulatedDeformConvForwardCUDAKernelLauncher(
      input, weight, bias, ones, offset, mask, output, columns, kernel_h,
      kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
-      deformable_group, with_bias, ctx, stream);
+      deformable_group, with_bias);
 }
-void modulated_deform_conv_backward_cuda(CudaContext& ctx,
+void modulated_deform_conv_backward_cuda(
-                                         const SSElement& attr,
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-                                         const OperatorBase::in_list_t& ins,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
-                                         OperatorBase::out_list_t& outs) {
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
-      dilation_w, group, deformable_group, with_bias;
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
-  SSAttrs(attr)
+    const bool with_bias) {
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-  auto input = ins[0];
-  auto weight = ins[1];
-  auto bias = ins[2];
-  auto ones = ins[3];
-  auto offset = ins[4];
-  auto mask = ins[5];
-  auto columns = outs[0];
-  auto grad_input = outs[1];
-  auto grad_weight = outs[2];
-  auto grad_bias = outs[3];
-  auto grad_offset = outs[4];
-  auto grad_mask = outs[5];
-  auto grad_output = outs[6];
-  cudaStream_t stream = getStreamNative<CudaDevice>(ctx.getStream());
  ModulatedDeformConvBackwardCUDAKernelLauncher(
      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
-      deformable_group, with_bias, ctx, stream);
+      deformable_group, with_bias);
 }
+#endif
-PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward)
+void modulated_deform_conv_forward(
-    .attr("kernel_h")
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    .attr("kernel_w")
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
-    .attr("stride_h")
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
-    .attr("stride_w")
+    const int dilation_h, const int dilation_w, const int group,
-    .attr("pad_h")
+    const int deformable_group, const bool with_bias) {
-    .attr("pad_w")
+  if (input.device().is_cuda()) {
-    .attr("dilation_h")
+#ifdef MMCV_WITH_CUDA
-    .attr("dilation_w")
+    CHECK_CUDA_INPUT(input);
-    .attr("group")
+    CHECK_CUDA_INPUT(weight);
-    .attr("deformable_group")
+    CHECK_CUDA_INPUT(bias);
-    .attr("with_bias")
+    CHECK_CUDA_INPUT(ones);
-    .input(6)
+    CHECK_CUDA_INPUT(offset);
-    .output(2)
+    CHECK_CUDA_INPUT(mask);
-    .apply(modulated_deform_conv_forward_cuda)
+    CHECK_CUDA_INPUT(output);
-    .done();
+    CHECK_CUDA_INPUT(columns);
-PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward)
+    modulated_deform_conv_forward_cuda(
-    .attr("kernel_h")
+        input, weight, bias, ones, offset, mask, output, columns, kernel_h,
-    .attr("kernel_w")
+        kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
-    .attr("stride_h")
+        group, deformable_group, with_bias);
-    .attr("stride_w")
+#else
-    .attr("pad_h")
+    AT_ERROR("ModulatedDeformConv is not compiled with GPU support");
-    .attr("pad_w")
+#endif
-    .attr("dilation_h")
+  } else {
-    .attr("dilation_w")
+    AT_ERROR("ModulatedDeformConv is not implemented on CPU");
-    .attr("group")
+  }
-    .attr("deformable_group")
+}
-    .attr("with_bias")
-    .input(6)
+void modulated_deform_conv_backward(
-    .output(7)
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    .apply(modulated_deform_conv_backward_cuda)
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
-    .done();
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias) {
+  if (input.device().is_cuda()) {
+#ifdef MMCV_WITH_CUDA
+    CHECK_CUDA_INPUT(input);
+    CHECK_CUDA_INPUT(weight);
+    CHECK_CUDA_INPUT(bias);
+    CHECK_CUDA_INPUT(ones);
+    CHECK_CUDA_INPUT(offset);
+    CHECK_CUDA_INPUT(mask);
+    CHECK_CUDA_INPUT(columns);
+    CHECK_CUDA_INPUT(grad_input);
+    CHECK_CUDA_INPUT(grad_weight);
+    CHECK_CUDA_INPUT(grad_bias);
+    CHECK_CUDA_INPUT(grad_offset);
+    CHECK_CUDA_INPUT(grad_mask);
+    CHECK_CUDA_INPUT(grad_output);
+    modulated_deform_conv_backward_cuda(
+        input, weight, bias, ones, offset, mask, columns, grad_input,
+        grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h,
+        kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
+        group, deformable_group, with_bias);
+#else
+    AT_ERROR("ModulatedDeformConv is not compiled with GPU support");
+#endif
+  } else {
+    AT_ERROR("ModulatedDeformConv is not implemented on CPU");
+  }
+}
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu
+++ b/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
+++ b/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
+#ifndef MODULATED_DEFORM_CONV_PYTORCH_H
+#define MODULATED_DEFORM_CONV_PYTORCH_H
+#include <torch/extension.h>
+using namespace at;
+void modulated_deform_conv_forward_cuda(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
+    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int dilation_h, const int dilation_w, const int group,
+    const int deformable_group, const bool with_bias);
+void modulated_deform_conv_backward_cuda(
+    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
+    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
+    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
+    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
+    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
+    const bool with_bias);
+#endif  // MODULATED_DEFORM_CONV_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/nms.cpp
+++ b/mmcv/ops/csrc/parrots/nms.cpp
--- a/mmcv/ops/csrc/parrots/nms_cuda.cu
+++ b/mmcv/ops/csrc/parrots/nms_cuda.cu
--- a/mmcv/ops/csrc/parrots/nms_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/nms_parrots.cpp
--- a/mmcv/ops/csrc/parrots/nms_pytorch.h
+++ b/mmcv/ops/csrc/parrots/nms_pytorch.h
+#ifndef NMS_PYTORCH_H
+#define NMS_PYTORCH_H
+#include <torch/extension.h>
+at::Tensor nms(at::Tensor boxes, at::Tensor scores, float iou_threshold,
+               int offset);
+at::Tensor softnms(at::Tensor boxes, at::Tensor scores, at::Tensor dets,
+                   float iou_threshold, float sigma, float min_score,
+                   int method, int offset);
+std::vector<std::vector<int> > nms_match(at::Tensor dets, float iou_threshold);
+at::Tensor nms_rotated(const at::Tensor dets, const at::Tensor scores,
+                       const at::Tensor order, const at::Tensor dets_sorted,
+                       const float iou_threshold, const int multi_label);
+#endif  // NMS_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/nms_rotated.cpp
+++ b/mmcv/ops/csrc/parrots/nms_rotated.cpp
--- a/mmcv/ops/csrc/parrots/nms_rotated_cpu.cpp
+++ b/mmcv/ops/csrc/parrots/nms_rotated_cpu.cpp
--- a/mmcv/ops/csrc/parrots/nms_rotated_cuda.cu
+++ b/mmcv/ops/csrc/parrots/nms_rotated_cuda.cu
--- a/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp
+++ b/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp
-#include "parrots_cpp_helper.hpp"
-using namespace parrots;
--- a/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu
+++ b/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu
-#include "parrots_cuda_helper.hpp"
-using namespace parrots;
--- a/mmcv/ops/csrc/parrots/psamask.cpp
+++ b/mmcv/ops/csrc/parrots/psamask.cpp
--- a/mmcv/ops/csrc/parrots/psamask_cuda.cu
+++ b/mmcv/ops/csrc/parrots/psamask_cuda.cu
--- a/mmcv/ops/csrc/parrots/psamask_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/psamask_parrots.cpp
--- a/mmcv/ops/csrc/parrots/psamask_pytorch.h
+++ b/mmcv/ops/csrc/parrots/psamask_pytorch.h
--- a/mmcv/ops/csrc/parrots/roi_align.cpp
+++ b/mmcv/ops/csrc/parrots/roi_align.cpp
--- a/mmcv/ops/csrc/parrots/roi_align_cpu.cpp
+++ b/mmcv/ops/csrc/parrots/roi_align_cpu.cpp