Unverified Commit dfb48c87 authored by Zaida Zhou's avatar Zaida Zhou Committed by GitHub
Browse files

[Refactor] Refactor the directory of csrc (#1206)



* [Refactor] Refactor the csrc directory

* update MANIFEST.in

* fix hip

* add csrc readme

* trailing whitespace

* fix syntax error in setup.py

* add compatibility docs

* move parrots_cudawarpfunction.cuh to common/cuda

* fix grammar, update directory tree

* fix MANIFEST.in

* Add new structre of csrc in compatibility.md

* Add original structre of csrc in compatibility.md

* fix typo

* remove TODO

* modify according to comment

* format
Co-authored-by: default avatargrimoire <yaoqian@sensetime.com>
parent 9fa5de8b
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
#include "nms_rotated_cuda.cuh"
#include "pytorch_cuda_helper.hpp"
Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
const Tensor order_t, const Tensor dets_sorted,
float iou_threshold, const int multi_label) {
// using scalar_t = float;
AT_ASSERTM(dets.type().is_cuda(), "dets must be a CUDA tensor");
AT_ASSERTM(scores.type().is_cuda(), "scores must be a CUDA tensor");
at::cuda::CUDAGuard device_guard(dets.device());
int dets_num = dets.size(0);
const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock);
Tensor mask =
at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong));
dim3 blocks(col_blocks, col_blocks);
dim3 threads(threadsPerBlock);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
dets_sorted.type(), "nms_rotated_kernel_cuda", [&] {
nms_rotated_cuda_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
dets_num, iou_threshold, dets_sorted.data<scalar_t>(),
(unsigned long long*)mask.data<int64_t>(), multi_label);
});
Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long* mask_host = (unsigned long long*)mask_cpu.data<int64_t>();
std::vector<unsigned long long> remv(col_blocks);
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
Tensor keep =
at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
int64_t* keep_out = keep.data<int64_t>();
int num_to_keep = 0;
for (int i = 0; i < dets_num; i++) {
int nblock = i / threadsPerBlock;
int inblock = i % threadsPerBlock;
if (!(remv[nblock] & (1ULL << inblock))) {
keep_out[num_to_keep++] = i;
unsigned long long* p = mask_host + i * col_blocks;
for (int j = nblock; j < col_blocks; j++) {
remv[j] |= p[j];
}
}
}
AT_CUDA_CHECK(cudaGetLastError());
return order_t.index(
{keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)
.to(order_t.device(), keep.scalar_type())});
}
......@@ -4,7 +4,7 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include "../pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
// implementation taken from Caffe2
template <typename T>
......
......@@ -4,7 +4,7 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include "../pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
// implementation taken from Caffe2
template <typename T>
......
File added
File added
......@@ -145,11 +145,10 @@ def get_extensions():
library_dirs += [tensorrt_lib_path]
libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
libraries += ['cudart']
kwargs = {}
define_macros = []
extra_compile_args = {'cxx': []}
include_path = os.path.abspath('./mmcv/ops/csrc')
include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda')
include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
include_dirs.append(include_path)
include_dirs.append(include_trt_path)
......@@ -163,9 +162,6 @@ def get_extensions():
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
library_dirs += library_paths(cuda=True)
kwargs['library_dirs'] = library_dirs
kwargs['libraries'] = libraries
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
......@@ -187,9 +183,11 @@ def get_extensions():
# new parrots op impl do not use MMCV_USE_PARROTS
# define_macros = [('MMCV_USE_PARROTS', None)]
define_macros = []
op_files = glob.glob('./mmcv/ops/csrc/parrots/*.cu') +\
include_dirs = []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') +\
glob.glob('./mmcv/ops/csrc/parrots/*.cpp')
include_dirs = [os.path.abspath('./mmcv/ops/csrc')]
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args = {
'nvcc': [cuda_args] if cuda_args else [],
......@@ -219,6 +217,7 @@ def get_extensions():
os.environ.setdefault('MAX_JOBS', '4')
define_macros = []
extra_compile_args = {'cxx': []}
include_dirs = []
is_rocm_pytorch = False
if parse_version(torch.__version__) >= parse_version('1.5'):
......@@ -226,13 +225,13 @@ def get_extensions():
is_rocm_pytorch = True if ((torch.version.hip is not None) and
(ROCM_HOME is not None)) else False
this_dir = 'mmcv/ops/csrc/'
project_dir = 'mmcv/ops/csrc/'
if is_rocm_pytorch:
from torch.utils.hipify import hipify_python
hipify_python.hipify(
project_directory=this_dir,
output_directory=this_dir,
project_directory=project_dir,
output_directory=project_dir,
includes='mmcv/ops/csrc/*',
show_detailed=True,
is_pytorch_extension=True,
......@@ -243,25 +242,26 @@ def get_extensions():
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*')
extension = CUDAExtension
include_path = os.path.abspath('./mmcv/ops/csrc/hip')
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip'))
elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu')
extension = CUDAExtension
include_path = os.path.abspath('./mmcv/ops/csrc')
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
else:
print(f'Compiling {ext_name} without CUDA')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp')
extension = CppExtension
include_path = os.path.abspath('./mmcv/ops/csrc')
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
ext_ops = extension(
name=ext_name,
sources=op_files,
include_dirs=[include_path],
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args)
extensions.append(ext_ops)
......@@ -276,7 +276,6 @@ def get_extensions():
ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
library_dirs += [os.path.join(ort_path, 'lib')]
libraries.append('onnxruntime')
kwargs = {}
define_macros = []
extra_compile_args = {'cxx': []}
......@@ -297,9 +296,6 @@ def get_extensions():
include_dirs += include_paths(cuda=False)
library_dirs += library_paths(cuda=False)
kwargs['library_dirs'] = library_dirs
kwargs['libraries'] = libraries
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment