Unverified Commit 43e94b39 authored by Ashish Farmer's avatar Ashish Farmer Committed by GitHub
Browse files

[ROCm] Create torchvision as a HIP Extension (#1928)

* Added code to support creating extension on ROCm

* max -> fmaxf conversion for hipification

* added WITH_HIP flag for hipExtension

* added appropriate headers for HIP build

* use USE_ROCM in condition to build

* change fmaxf and fminf calls

* fminf -> min

* fix the check for ROCM_HOME

* more robust checking for rocm pytorch

* add check for pytorch version before using HIP extensions

* conditional reading of ROCM_HOME
parent cca0c77a
......@@ -13,6 +13,7 @@ import shutil
import torch
from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
from torch.utils.hipify import hipify_python
def read(*names, **kwargs):
......@@ -83,6 +84,26 @@ def get_extensions():
main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
is_rocm_pytorch = False
if torch.__version__ >= '1.5':
from torch.utils.cpp_extension import ROCM_HOME
is_rocm_pytorch = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False
if is_rocm_pytorch:
hipify_python.hipify(
project_directory=this_dir,
output_directory=this_dir,
includes="torchvision/csrc/cuda/*",
show_detailed=True,
is_pytorch_extension=True,
)
source_cuda = glob.glob(os.path.join(extensions_dir, 'hip', '*.hip'))
## Copy over additional files
shutil.copy("torchvision/csrc/cuda/cuda_helpers.h", "torchvision/csrc/hip/cuda_helpers.h")
shutil.copy("torchvision/csrc/cuda/vision_cuda.h", "torchvision/csrc/hip/vision_cuda.h")
else:
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
sources = main_file + source_cpu
......@@ -103,15 +124,19 @@ def get_extensions():
define_macros = []
extra_compile_args = {}
if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) or os.getenv('FORCE_CUDA', '0') == '1':
extension = CUDAExtension
sources += source_cuda
if not is_rocm_pytorch:
define_macros += [('WITH_CUDA', None)]
nvcc_flags = os.getenv('NVCC_FLAGS', '')
if nvcc_flags == '':
nvcc_flags = []
else:
nvcc_flags = nvcc_flags.split(' ')
else:
define_macros += [('WITH_HIP', None)]
nvcc_flags = []
extra_compile_args = {
'cxx': [],
'nvcc': nvcc_flags,
......
......@@ -5,6 +5,9 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
at::Tensor DeformConv2d_forward(
const at::Tensor& input,
......@@ -17,7 +20,7 @@ at::Tensor DeformConv2d_forward(
const int groups,
const int offset_groups) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return DeformConv2d_forward_cuda(
input.contiguous(),
weight.contiguous(),
......@@ -56,7 +59,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> DeformConv2d_backward
const int groups,
const int offset_groups) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return DeformConv2d_backward_cuda(
grad.contiguous(),
input.contiguous(),
......
......@@ -5,6 +5,9 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
#include <iostream>
......@@ -16,7 +19,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward(
const int pooled_width,
const int sampling_ratio) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIAlign_forward_cuda(
input,
rois,
......@@ -45,7 +48,7 @@ at::Tensor PSROIAlign_backward(
const int height,
const int width) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIAlign_backward_cuda(
grad,
rois,
......
......@@ -5,6 +5,9 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
std::tuple<at::Tensor, at::Tensor> PSROIPool_forward(
const at::Tensor& input,
......@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward(
const int pooled_height,
const int pooled_width) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIPool_forward_cuda(
input, rois, spatial_scale, pooled_height, pooled_width);
#else
......@@ -36,7 +39,7 @@ at::Tensor PSROIPool_backward(
const int height,
const int width) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return PSROIPool_backward_cuda(
grad,
rois,
......
......@@ -5,6 +5,9 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
// Interface for Python
at::Tensor ROIAlign_forward(
......@@ -19,7 +22,7 @@ at::Tensor ROIAlign_forward(
// along each axis.
{
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIAlign_forward_cuda(
input,
rois,
......@@ -49,7 +52,7 @@ at::Tensor ROIAlign_backward(
const int sampling_ratio,
const bool aligned) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIAlign_backward_cuda(
grad,
rois,
......
......@@ -5,6 +5,9 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const at::Tensor& input,
......@@ -13,7 +16,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const int64_t pooled_height,
const int64_t pooled_width) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIPool_forward_cuda(
input, rois, spatial_scale, pooled_height, pooled_width);
#else
......@@ -36,7 +39,7 @@ at::Tensor ROIPool_backward(
const int height,
const int width) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA) || defined(WITH_HIP)
return ROIPool_backward_cuda(
grad,
rois,
......
#pragma once
#if defined(WITH_CUDA)
#include <c10/cuda/CUDAGuard.h>
#elif defined(WITH_HIP)
#include <c10/hip/HIPGuard.h>
#endif
#include <torch/extension.h>
at::Tensor ROIAlign_forward_cuda(
......
......@@ -4,18 +4,27 @@
#ifdef WITH_CUDA
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "hip/vision_cuda.h"
#endif
at::Tensor nms(
const at::Tensor& dets,
const at::Tensor& scores,
const double iou_threshold) {
if (dets.device().is_cuda()) {
#ifdef WITH_CUDA
#if defined(WITH_CUDA)
if (dets.numel() == 0) {
at::cuda::CUDAGuard device_guard(dets.device());
return at::empty({0}, dets.options().dtype(at::kLong));
}
return nms_cuda(dets, scores, iou_threshold);
#elif defined(WITH_HIP)
if (dets.numel() == 0) {
at::cuda::HIPGuard device_guard(dets.device());
return at::empty({0}, dets.options().dtype(at::kLong));
}
return nms_cuda(dets, scores, iou_threshold);
#else
AT_ERROR("Not compiled with GPU support");
#endif
......
......@@ -4,6 +4,9 @@
#ifdef WITH_CUDA
#include <cuda.h>
#endif
#ifdef WITH_HIP
#include <hip/hip_runtime.h>
#endif
#include "DeformConv.h"
#include "PSROIAlign.h"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment