Unverified Commit 2dc0a219 authored by Y_Xuan's avatar Y_Xuan Committed by GitHub
Browse files

[Feature] Porting mmcv for hip (#1022)

* porting mmcv for hip

* add nvcc

* fix format

* fix format

* fix bug for carafe

* fix test_utils because rocm_torch not allow set torch.backends.cudnn.benchmark to false

* add LOOSEVERSION

* fix format

* fix format of version

* fix code format

* test for yaml

* fix bug for citest

* fix bug for how to get torch._version_ at setup.py
parent db580dda
...@@ -7,7 +7,11 @@ ...@@ -7,7 +7,11 @@
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
#endif #endif
#ifdef HIP_DIFF
#define WARP_SIZE 64
#else
#define WARP_SIZE 32 #define WARP_SIZE 32
#endif
#define THREADS_PER_PIXEL 32 #define THREADS_PER_PIXEL 32
#define MAX_SHARED_MEMORY 49152 #define MAX_SHARED_MEMORY 49152
#define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144 #define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144
...@@ -24,6 +28,7 @@ __device__ inline int Loc2Index(const int n, const int c, const int h, ...@@ -24,6 +28,7 @@ __device__ inline int Loc2Index(const int n, const int c, const int h,
int index = w + (h + (c + n * channel_num) * height) * width; int index = w + (h + (c + n * channel_num) * height) * width;
return index; return index;
} }
#ifndef HIP_DIFF
/* TODO: move this to a common place */ /* TODO: move this to a common place */
template <typename scalar_t> template <typename scalar_t>
__device__ inline scalar_t min(scalar_t a, scalar_t b) { __device__ inline scalar_t min(scalar_t a, scalar_t b) {
...@@ -34,19 +39,27 @@ template <typename scalar_t> ...@@ -34,19 +39,27 @@ template <typename scalar_t>
__device__ inline scalar_t max(scalar_t a, scalar_t b) { __device__ inline scalar_t max(scalar_t a, scalar_t b) {
return a > b ? a : b; return a > b ? a : b;
} }
#endif
template <typename scalar_t> template <typename scalar_t>
__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { __device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) {
for (int offset = 16; offset > 0; offset /= 2) for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
#ifdef HIP_DIFF
val += __shfl_down(val, offset);
#else
val += __shfl_down_sync(FULL_MASK, val, offset); val += __shfl_down_sync(FULL_MASK, val, offset);
#endif
return val; return val;
} }
template <> template <>
__device__ __forceinline__ phalf warpReduceSum(phalf val) { __device__ __forceinline__ phalf warpReduceSum(phalf val) {
for (int offset = 16; offset > 0; offset /= 2) for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
#ifdef HIP_DIFF
__PHALF(val) += __shfl_down(FULL_MASK, val, offset);
#else
__PHALF(val) += __PHALF(val) +=
__shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset); __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset);
#endif
return val; return val;
} }
...@@ -302,7 +315,11 @@ __global__ void CARAFEBackward_Mask(const int num_kernels, ...@@ -302,7 +315,11 @@ __global__ void CARAFEBackward_Mask(const int num_kernels,
output_val += top_diff[top_id] * bottom_data[bottom_id]; output_val += top_diff[top_id] * bottom_data[bottom_id];
} }
} }
#ifdef HIP_DIFF
__syncthreads();
#else
__syncwarp(); __syncwarp();
#endif
output_val = warpReduceSum(output_val); output_val = warpReduceSum(output_val);
if (lane_id == 0) { if (lane_id == 0) {
const int mask_id = const int mask_id =
......
...@@ -3,12 +3,15 @@ ...@@ -3,12 +3,15 @@
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
int get_cudart_version() { return CUDART_VERSION; } int get_cudart_version() { return CUDART_VERSION; }
#endif #endif
#endif
std::string get_compiling_cuda_version() { std::string get_compiling_cuda_version() {
#ifdef MMCV_WITH_CUDA #ifdef MMCV_WITH_CUDA
#ifndef HIP_DIFF
std::ostringstream oss; std::ostringstream oss;
// copied from // copied from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
...@@ -20,6 +23,9 @@ std::string get_compiling_cuda_version() { ...@@ -20,6 +23,9 @@ std::string get_compiling_cuda_version() {
}; };
printCudaStyleVersion(get_cudart_version()); printCudaStyleVersion(get_cudart_version());
return oss.str(); return oss.str();
#else
return std::string("rocm not vailable");
#endif
#else #else
return std::string("not available"); return std::string("not available");
#endif #endif
......
from functools import partial from functools import partial
from pkg_resources import parse_version
import torch import torch
TORCH_VERSION = torch.__version__ TORCH_VERSION = torch.__version__
is_rocm_pytorch = False
if parse_version(TORCH_VERSION) >= parse_version('1.5'):
from torch.utils.cpp_extension import ROCM_HOME
is_rocm_pytorch = True if ((torch.version.hip is not None) and
(ROCM_HOME is not None)) else False
def _get_cuda_home(): def _get_cuda_home():
if TORCH_VERSION == 'parrots': if TORCH_VERSION == 'parrots':
from parrots.utils.build_extension import CUDA_HOME from parrots.utils.build_extension import CUDA_HOME
else: else:
from torch.utils.cpp_extension import CUDA_HOME if is_rocm_pytorch:
from torch.utils.cpp_extension import ROCM_HOME
CUDA_HOME = ROCM_HOME
else:
from torch.utils.cpp_extension import CUDA_HOME
return CUDA_HOME return CUDA_HOME
......
import glob import glob
import os import os
import re import re
from pkg_resources import DistributionNotFound, get_distribution from pkg_resources import DistributionNotFound, get_distribution, parse_version
from setuptools import find_packages, setup from setuptools import find_packages, setup
EXT_TYPE = '' EXT_TYPE = ''
...@@ -220,18 +220,44 @@ def get_extensions(): ...@@ -220,18 +220,44 @@ def get_extensions():
define_macros = [] define_macros = []
extra_compile_args = {'cxx': []} extra_compile_args = {'cxx': []}
if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': is_rocm_pytorch = False
if parse_version(torch.__version__) >= parse_version('1.5'):
from torch.utils.cpp_extension import ROCM_HOME
is_rocm_pytorch = True if ((torch.version.hip is not None) and
(ROCM_HOME is not None)) else False
this_dir = 'mmcv/ops/csrc/'
if is_rocm_pytorch:
from torch.utils.hipify import hipify_python
hipify_python.hipify(
project_directory=this_dir,
output_directory=this_dir,
includes='mmcv/ops/csrc/*',
show_detailed=True,
is_pytorch_extension=True,
)
define_macros += [('MMCV_WITH_CUDA', None)]
define_macros += [('HIP_DIFF', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/hip/*')
extension = CUDAExtension
include_path = os.path.abspath('./mmcv/ops/csrc/hip')
elif torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
define_macros += [('MMCV_WITH_CUDA', None)] define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS') cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*') op_files = glob.glob('./mmcv/ops/csrc/pytorch/*')
extension = CUDAExtension extension = CUDAExtension
include_path = os.path.abspath('./mmcv/ops/csrc')
else: else:
print(f'Compiling {ext_name} without CUDA') print(f'Compiling {ext_name} without CUDA')
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp')
extension = CppExtension extension = CppExtension
include_path = os.path.abspath('./mmcv/ops/csrc')
include_path = os.path.abspath('./mmcv/ops/csrc')
ext_ops = extension( ext_ops = extension(
name=ext_name, name=ext_name,
sources=op_files, sources=op_files,
......
import os import os
import random import random
from pkg_resources import parse_version
import numpy as np import numpy as np
import torch import torch
from mmcv.runner import set_random_seed from mmcv.runner import set_random_seed
from mmcv.utils import TORCH_VERSION
is_rocm_pytorch = False
if parse_version(TORCH_VERSION) >= parse_version('1.5'):
from torch.utils.cpp_extension import ROCM_HOME
is_rocm_pytorch = True if ((torch.version.hip is not None) and
(ROCM_HOME is not None)) else False
def test_set_random_seed(): def test_set_random_seed():
...@@ -21,7 +29,10 @@ def test_set_random_seed(): ...@@ -21,7 +29,10 @@ def test_set_random_seed():
b_np_random = np.random.rand(2, 2) b_np_random = np.random.rand(2, 2)
b_torch_random = torch.rand(2, 2) b_torch_random = torch.rand(2, 2)
assert torch.backends.cudnn.deterministic is True assert torch.backends.cudnn.deterministic is True
assert torch.backends.cudnn.benchmark is False if is_rocm_pytorch:
assert torch.backends.cudnn.benchmark is True
else:
assert torch.backends.cudnn.benchmark is False
assert a_random == b_random assert a_random == b_random
assert np.equal(a_np_random, b_np_random).all() assert np.equal(a_np_random, b_np_random).all()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment