[Refactor] Refactor the directory of csrc (#1206)

* [Refactor] Refactor the csrc directory * update MANIFEST.in * fix hip * add csrc readme * trailing whitespace * fix syntax error in setup.py * add compatibility docs * move parrots_cudawarpfunction.cuh to common/cuda * fix grammar, update directory tree * fix MANIFEST.in * Add new structre of csrc in compatibility.md * Add original structre of csrc in compatibility.md * fix typo * remove TODO * modify according to comment * format Co-authored-by: grimoire <yaoqian@sensetime.com>

[Refactor] Refactor the directory of csrc (#1206)
* [Refactor] Refactor the csrc directory * update MANIFEST.in * fix hip * add csrc readme * trailing whitespace * fix syntax error in setup.py * add compatibility docs * move parrots_cudawarpfunction.cuh to common/cuda * fix grammar, update directory tree * fix MANIFEST.in * Add new structre of csrc in compatibility.md * Add original structre of csrc in compatibility.md * fix typo * remove TODO * modify according to comment * format Co-authored-by: grimoire <yaoqian@sensetime.com>
dfb48c87 · Zaida Zhou · GitHub · 9fa5de8b · 9fa5de8b · dfb48c87
Unverified Commit dfb48c87 authored Aug 10, 2021 by Zaida Zhou Committed by GitHub Aug 10, 2021
6 changed files
--- a/mmcv/ops/csrc/pytorch/nms_rotated_cuda.cu
+++ b/mmcv/ops/csrc/pytorch/nms_rotated_cuda.cu
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-// modified from
-// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
-#include "nms_rotated_cuda.cuh"
-#include "pytorch_cuda_helper.hpp"
-
-Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
-                        const Tensor order_t, const Tensor dets_sorted,
-                        float iou_threshold, const int multi_label) {
-  // using scalar_t = float;
-  AT_ASSERTM(dets.type().is_cuda(), "dets must be a CUDA tensor");
-  AT_ASSERTM(scores.type().is_cuda(), "scores must be a CUDA tensor");
-  at::cuda::CUDAGuard device_guard(dets.device());
-
-  int dets_num = dets.size(0);
-
-  const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock);
-
-  Tensor mask =
-      at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
-
-  dim3 blocks(col_blocks, col_blocks);
-  dim3 threads(threadsPerBlock);
-  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
-      dets_sorted.type(), "nms_rotated_kernel_cuda", [&] {
-        nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-            dets_num, iou_threshold, dets_sorted.data<scalar_t>(),
-            (unsigned long long*)mask.data<int64_t>(), multi_label);
-      });
-
-  Tensor mask_cpu = mask.to(at::kCPU);
-  unsigned long long* mask_host = (unsigned long long*)mask_cpu.data<int64_t>();
-
-  std::vector<unsigned long long> remv(col_blocks);
-  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
-
-  Tensor keep =
-      at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
-  int64_t* keep_out = keep.data<int64_t>();
-
-  int num_to_keep = 0;
-  for (int i = 0; i < dets_num; i++) {
-    int nblock = i / threadsPerBlock;
-    int inblock = i % threadsPerBlock;
-
-    if (!(remv[nblock] & (1ULL << inblock))) {
-      keep_out[num_to_keep++] = i;
-      unsigned long long* p = mask_host + i * col_blocks;
-      for (int j = nblock; j < col_blocks; j++) {
-        remv[j] |= p[j];
-      }
-    }
-  }
-
-  AT_CUDA_CHECK(cudaGetLastError());
-  return order_t.index(
-      {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
-           .to(order_t.device(), keep.scalar_type())});
-}
--- a/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp
+++ b/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp
@@ -4,7 +4,7 @@
 #include <ATen/ATen.h>
 #include <ATen/TensorUtils.h>

-#include "../pytorch_cpp_helper.hpp"
+#include "pytorch_cpp_helper.hpp"

 // implementation taken from Caffe2
 template <typename T>

--- a/mmcv/ops/csrc/pytorch/roi_align_rotated_cpu.cpp
+++ b/mmcv/ops/csrc/pytorch/roi_align_rotated_cpu.cpp
@@ -4,7 +4,7 @@
 #include <ATen/ATen.h>
 #include <ATen/TensorUtils.h>

-#include "../pytorch_cpp_helper.hpp"
+#include "pytorch_cpp_helper.hpp"

 // implementation taken from Caffe2
 template <typename T>

--- a/model.pth
+++ b/model.pth
--- a/modelA.pth
+++ b/modelA.pth
--- a/setup.py
+++ b/setup.py
@@ -145,11 +145,10 @@ def get_extensions():
        library_dirs += [tensorrt_lib_path]
        libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
        libraries += ['cudart']
-        kwargs = {}
        define_macros = []
        extra_compile_args = {'cxx': []}

-        include_path = os.path.abspath('./mmcv/ops/csrc')
+        include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda')
        include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
        include_dirs.append(include_path)
        include_dirs.append(include_trt_path)
@@ -163,9 +162,6 @@ def get_extensions():
        extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
        library_dirs += library_paths(cuda=True)

-        kwargs['library_dirs'] = library_dirs
-        kwargs['libraries'] = libraries
-
        from setuptools import Extension
        ext_ops = Extension(
            name=ext_name,
@@ -187,9 +183,11 @@ def get_extensions():
        # new parrots op impl do not use MMCV_USE_PARROTS
        # define_macros = [('MMCV_USE_PARROTS', None)]
        define_macros = []
-        op_files = glob.glob('./mmcv/ops/csrc/parrots/*.cu') +\
+        include_dirs = []
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') +\
            glob.glob('./mmcv/ops/csrc/parrots/*.cpp')
-        include_dirs = [os.path.abspath('./mmcv/ops/csrc')]
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
        cuda_args = os.getenv('MMCV_CUDA_ARGS')
        extra_compile_args = {
            'nvcc': [cuda_args] if cuda_args else [],
@@ -219,6 +217,7 @@ def get_extensions():
        os.environ.setdefault('MAX_JOBS', '4')
        define_macros = []
        extra_compile_args = {'cxx': []}
+        include_dirs = []

        is_rocm_pytorch = False
        if parse_version(torch.__version__) >= parse_version('1.5'):
@@ -226,13 +225,13 @@ def get_extensions():
            is_rocm_pytorch = True if ((torch.version.hip is not None) and
                                       (ROCM_HOME is not None)) else False

-        this_dir = 'mmcv/ops/csrc/'
+        project_dir = 'mmcv/ops/csrc/'
        if is_rocm_pytorch:
            from torch.utils.hipify import hipify_python

            hipify_python.hipify(
-                project_directory=this_dir,
-                output_directory=this_dir,
+                project_directory=project_dir,
+                output_directory=project_dir,
                includes='mmcv/ops/csrc/*',
                show_detailed=True,
                is_pytorch_extension=True,
@@ -243,25 +242,26 @@ def get_extensions():
            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
            op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*')
            extension = CUDAExtension
-            include_path = os.path.abspath('./mmcv/ops/csrc/hip')
-
+            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip'))
        elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
            define_macros += [('MMCV_WITH_CUDA', None)]
            cuda_args = os.getenv('MMCV_CUDA_ARGS')
            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*')
+            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+                glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu')
            extension = CUDAExtension
-            include_path = os.path.abspath('./mmcv/ops/csrc')
+            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
        else:
            print(f'Compiling {ext_name} without CUDA')
            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp')
            extension = CppExtension
-            include_path = os.path.abspath('./mmcv/ops/csrc')
+            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))

        ext_ops = extension(
            name=ext_name,
            sources=op_files,
-            include_dirs=[include_path],
+            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args)
        extensions.append(ext_ops)
@@ -276,7 +276,6 @@ def get_extensions():
        ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
        library_dirs += [os.path.join(ort_path, 'lib')]
        libraries.append('onnxruntime')
-        kwargs = {}
        define_macros = []
        extra_compile_args = {'cxx': []}

@@ -297,9 +296,6 @@ def get_extensions():
            include_dirs += include_paths(cuda=False)
            library_dirs += library_paths(cuda=False)

-        kwargs['library_dirs'] = library_dirs
-        kwargs['libraries'] = libraries
-
        from setuptools import Extension
        ext_ops = Extension(
            name=ext_name,