Unverified Commit 43e94b39 authored by Ashish Farmer's avatar Ashish Farmer Committed by GitHub
Browse files

[ROCm] Create torchvision as a HIP Extension (#1928)

* Added code to support creating extension on ROCm

* max -> fmaxf conversion for hipification

* added WITH_HIP flag for hipExtension

* added appropriate headers for HIP build

* use USE_ROCM in condition to build

* change fmaxf and fminf calls

* fminf -> min

* fix the check for ROCM_HOME

* more robust checking for rocm pytorch

* add check for pytorch version before using HIP extensions

* conditional reading of ROCM_HOME
parent cca0c77a
...@@ -13,6 +13,7 @@ import shutil ...@@ -13,6 +13,7 @@ import shutil
import torch import torch
from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
from torch.utils.hipify import hipify_python
def read(*names, **kwargs): def read(*names, **kwargs):
...@@ -83,7 +84,27 @@ def get_extensions(): ...@@ -83,7 +84,27 @@ def get_extensions():
main_file = glob.glob(os.path.join(extensions_dir, '*.cpp')) main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp')) source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
is_rocm_pytorch = False
if torch.__version__ >= '1.5':
from torch.utils.cpp_extension import ROCM_HOME
is_rocm_pytorch = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False
if is_rocm_pytorch:
hipify_python.hipify(
project_directory=this_dir,
output_directory=this_dir,
includes="torchvision/csrc/cuda/*",
show_detailed=True,
is_pytorch_extension=True,
)
source_cuda = glob.glob(os.path.join(extensions_dir, 'hip', '*.hip'))
## Copy over additional files
shutil.copy("torchvision/csrc/cuda/cuda_helpers.h", "torchvision/csrc/hip/cuda_helpers.h")
shutil.copy("torchvision/csrc/cuda/vision_cuda.h", "torchvision/csrc/hip/vision_cuda.h")
else:
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
sources = main_file + source_cpu sources = main_file + source_cpu
extension = CppExtension extension = CppExtension
...@@ -103,15 +124,19 @@ def get_extensions(): ...@@ -103,15 +124,19 @@ def get_extensions():
define_macros = [] define_macros = []
extra_compile_args = {} extra_compile_args = {}
if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1': if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) or os.getenv('FORCE_CUDA', '0') == '1':
extension = CUDAExtension extension = CUDAExtension
sources += source_cuda sources += source_cuda
define_macros += [('WITH_CUDA', None)] if not is_rocm_pytorch:
nvcc_flags = os.getenv('NVCC_FLAGS', '') define_macros += [('WITH_CUDA', None)]
if nvcc_flags == '': nvcc_flags = os.getenv('NVCC_FLAGS', '')
nvcc_flags = [] if nvcc_flags == '':
nvcc_flags = []
else:
nvcc_flags = nvcc_flags.split(' ')
else: else:
nvcc_flags = nvcc_flags.split(' ') define_macros += [('WITH_HIP', None)]
nvcc_flags = []
extra_compile_args = { extra_compile_args = {
'cxx': [], 'cxx': [],
'nvcc': nvcc_flags, 'nvcc': nvcc_flags,
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
at::Tensor DeformConv2d_forward( at::Tensor DeformConv2d_forward(
const at::Tensor& input, const at::Tensor& input,
...@@ -17,7 +20,7 @@ at::Tensor DeformConv2d_forward( ...@@ -17,7 +20,7 @@ at::Tensor DeformConv2d_forward(
const int groups, const int groups,
const int offset_groups) { const int offset_groups) {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return DeformConv2d_forward_cuda( return DeformConv2d_forward_cuda(
input.contiguous(), input.contiguous(),
weight.contiguous(), weight.contiguous(),
...@@ -56,7 +59,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> DeformConv2d_backward ...@@ -56,7 +59,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> DeformConv2d_backward
const int groups, const int groups,
const int offset_groups) { const int offset_groups) {
if (grad.type().is_cuda()) { if (grad.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return DeformConv2d_backward_cuda( return DeformConv2d_backward_cuda(
grad.contiguous(), grad.contiguous(),
input.contiguous(), input.contiguous(),
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
#include <iostream> #include <iostream>
...@@ -16,7 +19,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward( ...@@ -16,7 +19,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward(
const int pooled_width, const int pooled_width,
const int sampling_ratio) { const int sampling_ratio) {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIAlign_forward_cuda( return PSROIAlign_forward_cuda(
input, input,
rois, rois,
...@@ -45,7 +48,7 @@ at::Tensor PSROIAlign_backward( ...@@ -45,7 +48,7 @@ at::Tensor PSROIAlign_backward(
const int height, const int height,
const int width) { const int width) {
if (grad.type().is_cuda()) { if (grad.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIAlign_backward_cuda( return PSROIAlign_backward_cuda(
grad, grad,
rois, rois,
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
std::tuple<at::Tensor, at::Tensor> PSROIPool_forward( std::tuple<at::Tensor, at::Tensor> PSROIPool_forward(
const at::Tensor& input, const at::Tensor& input,
...@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward( ...@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward(
const int pooled_height, const int pooled_height,
const int pooled_width) { const int pooled_width) {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIPool_forward_cuda( return PSROIPool_forward_cuda(
input, rois, spatial_scale, pooled_height, pooled_width); input, rois, spatial_scale, pooled_height, pooled_width);
#else #else
...@@ -36,7 +39,7 @@ at::Tensor PSROIPool_backward( ...@@ -36,7 +39,7 @@ at::Tensor PSROIPool_backward(
const int height, const int height,
const int width) { const int width) {
if (grad.type().is_cuda()) { if (grad.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIPool_backward_cuda( return PSROIPool_backward_cuda(
grad, grad,
rois, rois,
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
// Interface for Python // Interface for Python
at::Tensor ROIAlign_forward( at::Tensor ROIAlign_forward(
...@@ -19,7 +22,7 @@ at::Tensor ROIAlign_forward( ...@@ -19,7 +22,7 @@ at::Tensor ROIAlign_forward(
// along each axis. // along each axis.
{ {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIAlign_forward_cuda( return ROIAlign_forward_cuda(
input, input,
rois, rois,
...@@ -49,7 +52,7 @@ at::Tensor ROIAlign_backward( ...@@ -49,7 +52,7 @@ at::Tensor ROIAlign_backward(
const int sampling_ratio, const int sampling_ratio,
const bool aligned) { const bool aligned) {
if (grad.type().is_cuda()) { if (grad.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIAlign_backward_cuda( return ROIAlign_backward_cuda(
grad, grad,
rois, rois,
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
std::tuple<at::Tensor, at::Tensor> ROIPool_forward( std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const at::Tensor& input, const at::Tensor& input,
...@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward( ...@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const int64_t pooled_height, const int64_t pooled_height,
const int64_t pooled_width) { const int64_t pooled_width) {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIPool_forward_cuda( return ROIPool_forward_cuda(
input, rois, spatial_scale, pooled_height, pooled_width); input, rois, spatial_scale, pooled_height, pooled_width);
#else #else
...@@ -36,7 +39,7 @@ at::Tensor ROIPool_backward( ...@@ -36,7 +39,7 @@ at::Tensor ROIPool_backward(
const int height, const int height,
const int width) { const int width) {
if (grad.type().is_cuda()) { if (grad.type().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIPool_backward_cuda( return ROIPool_backward_cuda(
grad, grad,
rois, rois,
......
#pragma once #pragma once
#if defined(WITH_CUDA)
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#elif defined(WITH_HIP)
#include <c10/hip/HIPGuard.h>
#endif
#include <torch/extension.h> #include <torch/extension.h>
at::Tensor ROIAlign_forward_cuda( at::Tensor ROIAlign_forward_cuda(
......
...@@ -4,18 +4,27 @@ ...@@ -4,18 +4,27 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/vision_cuda.h" #include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
at::Tensor nms( at::Tensor nms(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
const double iou_threshold) { const double iou_threshold) {
if (dets.device().is_cuda()) { if (dets.device().is_cuda()) {
#ifdef WITH_CUDA #if defined(WITH_CUDA)
if (dets.numel() == 0) { if (dets.numel() == 0) {
at::cuda::CUDAGuard device_guard(dets.device()); at::cuda::CUDAGuard device_guard(dets.device());
return at::empty({0}, dets.options().dtype(at::kLong)); return at::empty({0}, dets.options().dtype(at::kLong));
} }
return nms_cuda(dets, scores, iou_threshold); return nms_cuda(dets, scores, iou_threshold);
#elif defined(WITH_HIP)
if (dets.numel() == 0) {
at::cuda::HIPGuard device_guard(dets.device());
return at::empty({0}, dets.options().dtype(at::kLong));
}
return nms_cuda(dets, scores, iou_threshold);
#else #else
AT_ERROR("Not compiled with GPU support"); AT_ERROR("Not compiled with GPU support");
#endif #endif
......
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include <cuda.h> #include <cuda.h>
#endif #endif
#ifdef WITH_HIP
#include <hip/hip_runtime.h>
#endif
#include "DeformConv.h" #include "DeformConv.h"
#include "PSROIAlign.h" #include "PSROIAlign.h"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment