Unverified Commit 230f9a3b authored by q.yao's avatar q.yao Committed by GitHub
Browse files

Refactor csrc with device dispatcher (#1463)

* Add device registry for pytorch ops

* add declaration of CheckDeviceConsistency

* fix for torch130

* assert with torch check

* Refactor ops with dispatch

* update rest ops

* faster install

* update compatibility

* update compatibility, rename parameter

* move cpu implement to pytorch/cpu

* update ops/csrc/README.md

* fix rocm support

* update cn document

* update docs

* list instead of map
parent ef8ba752
......@@ -2,29 +2,17 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2,
Tensor idx);
void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2, Tensor idx) {
ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
};
#endif
DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
idx);
}
void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
int m) {
if (unknown_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
#else
AT_ERROR("three_nn is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_nn is not implemented on CPU");
}
three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
}
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
Tensor output);
void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
Tensor grad_input);
void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
TINShiftForwardCUDAKernelLauncher(input, shift, output);
void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
}
void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
Tensor grad_input) {
TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
}
#endif
void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(output);
tin_shift_forward_cuda(input, shift, output);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
tin_shift_forward_impl(input, shift, output);
}
void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
if (grad_output.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(grad_input);
tin_shift_backward_cuda(grad_output, shift, grad_input);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
tin_shift_backward_impl(grad_output, shift, grad_input);
}
......@@ -100,25 +100,19 @@ THE POSSIBILITY OF SUCH DAMAGES.
*/
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
torch::Tensor upfirdn2d_op(const torch::Tensor& input,
const torch::Tensor& kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1,
int pad_y0, int pad_y1);
#endif
#include "pytorch_device_registry.hpp"
torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
const torch::Tensor& kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1,
int pad_y0, int pad_y1) {
return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
}
torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
int up_x, int up_y, int down_x, int down_y, int pad_x0,
int pad_x1, int pad_y0, int pad_y1) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA(input);
CHECK_CUDA(kernel);
return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
pad_y0, pad_y1);
#else
AT_ERROR("UpFirDn2d is not compiled with GPU support");
#endif
return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
pad_x1, pad_y0, pad_y1);
}
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
int HardVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const int max_points,
const int max_voxels, const int NDim = 3);
int hard_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &voxels,
int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
return HardVoxelizeForwardCUDAKernelLauncher(
points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim);
};
void DynamicVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const int NDim = 3);
return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim);
}
void dynamic_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &coors,
void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3) {
DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
coors_range, NDim);
};
#endif
int hard_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3);
void dynamic_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3);
DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
coors_range, NDim);
}
void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
......@@ -60,21 +35,10 @@ void hard_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
*voxel_num_data = hard_voxelize_forward_cuda(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
#else
AT_ERROR("hard_voxelize is not compiled with GPU support");
#endif
} else {
*voxel_num_data = hard_voxelize_forward_cpu(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
}
*voxel_num_data = hard_voxelize_forward_impl(
points, voxels, coors, num_points_per_voxel, voxel_size_v, coors_range_v,
max_points, max_voxels, NDim);
}
void dynamic_voxelize_forward(const at::Tensor &points,
......@@ -87,17 +51,6 @@ void dynamic_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
dynamic_voxelize_forward_cuda(points, coors, voxel_size_v, coors_range_v,
NDim);
#else
AT_ERROR("dynamic_voxelize is not compiled with GPU support");
#endif
} else {
dynamic_voxelize_forward_cpu(points, coors, voxel_size_v, coors_range_v,
NDim);
}
dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
NDim);
}
......@@ -223,7 +223,7 @@ def get_extensions():
os.environ.setdefault('MAX_JOBS', str(cpu_use))
define_macros = []
extra_compile_args = {'cxx': []}
extra_compile_args = {'cxx': ['-std=c++14']}
include_dirs = []
is_rocm_pytorch = False
......@@ -249,7 +249,8 @@ def get_extensions():
define_macros += [('HIP_DIFF', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*') \
+ glob.glob('./mmcv/ops/csrc/pytorch/cpu/hip/*')
extension = CUDAExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/hip'))
elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
......@@ -257,16 +258,22 @@ def get_extensions():
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu')
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp')
extension = CUDAExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
else:
print(f'Compiling {ext_name} without CUDA')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
extension = CppExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
if 'nvcc' in extra_compile_args:
extra_compile_args['nvcc'] += ['-std=c++14']
ext_ops = extension(
name=ext_name,
sources=op_files,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment