"simplekey.cpp" did not exist on "fb9176a0543e5bb704356e5469cb0a9ac8c9e03a"
Unverified Commit 230f9a3b authored by q.yao's avatar q.yao Committed by GitHub
Browse files

Refactor csrc with device dispatcher (#1463)

* Add device registry for pytorch ops

* add declaration of CheckDeviceConsistency

* fix for torch130

* assert with torch check

* Refactor ops with dispatch

* update rest ops

* faster install

* update compatibility

* update compatibility, rename parameter

* move cpu implement to pytorch/cpu

* update ops/csrc/README.md

* fix rocm support

* update cn document

* update docs

* list instead of map
parent ef8ba752
...@@ -2,29 +2,17 @@ ...@@ -2,29 +2,17 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2,
Tensor idx);
void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2, Tensor idx) { const Tensor known, Tensor dist2, Tensor idx) {
ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx); DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
}; idx);
#endif }
void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor, void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor dist2_tensor, Tensor idx_tensor, int b, int n, Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
int m) { int m) {
if (unknown_tensor.device().is_cuda()) { three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
#ifdef MMCV_WITH_CUDA idx_tensor);
three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
#else
AT_ERROR("three_nn is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_nn is not implemented on CPU");
}
} }
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
Tensor output);
void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
Tensor grad_input);
void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
TINShiftForwardCUDAKernelLauncher(input, shift, output);
} }
void tin_shift_backward_cuda(Tensor grad_output, Tensor shift, void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
Tensor grad_input) { Tensor grad_input) {
TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input); DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
} }
#endif
void tin_shift_forward(Tensor input, Tensor shift, Tensor output) { void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
if (input.device().is_cuda()) { tin_shift_forward_impl(input, shift, output);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(output);
tin_shift_forward_cuda(input, shift, output);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
} }
void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) { void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
if (grad_output.device().is_cuda()) { tin_shift_backward_impl(grad_output, shift, grad_input);
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(grad_input);
tin_shift_backward_cuda(grad_output, shift, grad_input);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
} }
...@@ -100,25 +100,19 @@ THE POSSIBILITY OF SUCH DAMAGES. ...@@ -100,25 +100,19 @@ THE POSSIBILITY OF SUCH DAMAGES.
*/ */
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
torch::Tensor upfirdn2d_op(const torch::Tensor& input, torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
const torch::Tensor& kernel, int up_x, int up_y, const torch::Tensor& kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1, int down_x, int down_y, int pad_x0, int pad_x1,
int pad_y0, int pad_y1); int pad_y0, int pad_y1) {
return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
#endif down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
}
torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
int up_x, int up_y, int down_x, int down_y, int pad_x0, int up_x, int up_y, int down_x, int down_y, int pad_x0,
int pad_x1, int pad_y0, int pad_y1) { int pad_x1, int pad_y0, int pad_y1) {
#ifdef MMCV_WITH_CUDA return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
CHECK_CUDA(input); pad_x1, pad_y0, pad_y1);
CHECK_CUDA(kernel);
return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
pad_y0, pad_y1);
#else
AT_ERROR("UpFirDn2d is not compiled with GPU support");
#endif
} }
// Copyright (c) OpenMMLab. All rights reserved. // Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
int HardVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const int max_points,
const int max_voxels, const int NDim = 3);
int hard_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors, at::Tensor &coors,
at::Tensor &num_points_per_voxel, at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const std::vector<float> coors_range,
const int max_points, const int max_voxels, const int max_points, const int max_voxels,
const int NDim = 3) { const int NDim = 3) {
return HardVoxelizeForwardCUDAKernelLauncher( return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
points, voxels, coors, num_points_per_voxel, voxel_size, coors_range, num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim); max_points, max_voxels, NDim);
}; }
void DynamicVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const int NDim = 3);
void dynamic_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &coors, void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const std::vector<float> coors_range,
const int NDim = 3) { const int NDim = 3) {
DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size, DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
coors_range, NDim); coors_range, NDim);
}; }
#endif
int hard_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3);
void dynamic_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3);
void hard_voxelize_forward(const at::Tensor &points, void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size, const at::Tensor &voxel_size,
...@@ -60,21 +35,10 @@ void hard_voxelize_forward(const at::Tensor &points, ...@@ -60,21 +35,10 @@ void hard_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v( std::vector<float> coors_range_v(
coors_range.data_ptr<float>(), coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel()); coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
*voxel_num_data = hard_voxelize_forward_cuda( *voxel_num_data = hard_voxelize_forward_impl(
points, voxels, coors, num_points_per_voxel, voxel_size_v, points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v,
coors_range_v, max_points, max_voxels, NDim); max_points, max_voxels, NDim);
#else
AT_ERROR("hard_voxelize is not compiled with GPU support");
#endif
} else {
*voxel_num_data = hard_voxelize_forward_cpu(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
}
} }
void dynamic_voxelize_forward(const at::Tensor &points, void dynamic_voxelize_forward(const at::Tensor &points,
...@@ -87,17 +51,6 @@ void dynamic_voxelize_forward(const at::Tensor &points, ...@@ -87,17 +51,6 @@ void dynamic_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v( std::vector<float> coors_range_v(
coors_range.data_ptr<float>(), coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel()); coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) { dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
#ifdef MMCV_WITH_CUDA NDim);
CHECK_CUDA_INPUT(points);
dynamic_voxelize_forward_cuda(points, coors, voxel_size_v, coors_range_v,
NDim);
#else
AT_ERROR("dynamic_voxelize is not compiled with GPU support");
#endif
} else {
dynamic_voxelize_forward_cpu(points, coors, voxel_size_v, coors_range_v,
NDim);
}
} }
...@@ -223,7 +223,7 @@ def get_extensions(): ...@@ -223,7 +223,7 @@ def get_extensions():
os.environ.setdefault('MAX_JOBS', str(cpu_use)) os.environ.setdefault('MAX_JOBS', str(cpu_use))
define_macros = [] define_macros = []
extra_compile_args = {'cxx': []} extra_compile_args = {'cxx': ['-std=c++14']}
include_dirs = [] include_dirs = []
is_rocm_pytorch = False is_rocm_pytorch = False
...@@ -249,7 +249,8 @@ def get_extensions(): ...@@ -249,7 +249,8 @@ def get_extensions():
define_macros += [('HIP_DIFF', None)] define_macros += [('HIP_DIFF', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS') cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*') op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*') \
+ glob.glob('./mmcv/ops/csrc/pytorch/cpu/hip/*')
extension = CUDAExtension extension = CUDAExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip'))
elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
...@@ -257,16 +258,22 @@ def get_extensions(): ...@@ -257,16 +258,22 @@ def get_extensions():
cuda_args = os.getenv('MMCV_CUDA_ARGS') cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \ op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp')
extension = CUDAExtension extension = CUDAExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
else: else:
print(f'Compiling {ext_name} without CUDA') print(f'Compiling {ext_name} without CUDA')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
extension = CppExtension extension = CppExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common')) include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
if 'nvcc' in extra_compile_args:
extra_compile_args['nvcc'] += ['-std=c++14']
ext_ops = extension( ext_ops = extension(
name=ext_name, name=ext_name,
sources=op_files, sources=op_files,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment