Unverified Commit 663d5a4d authored by Chaitanya Sri Krishna Lolla's avatar Chaitanya Sri Krishna Lolla Committed by GitHub
Browse files

Merge pull request #38 from lcskrishna/cl/rocm-hipify-revamp

Hipify revamp changes for apex extensions on ROCm.
parents 7eed38aa 3fdb8db9
...@@ -119,6 +119,7 @@ See the [Docker example folder](https://github.com/NVIDIA/apex/tree/master/examp ...@@ -119,6 +119,7 @@ See the [Docker example folder](https://github.com/NVIDIA/apex/tree/master/examp
* Python 3.6 * Python 3.6
* Pytorch 1.5 or newer, The HIPExtensions require 1.5 or newer. * Pytorch 1.5 or newer, The HIPExtensions require 1.5 or newer.
* We recommend follow the instructions from [ROCm-Pytorch](https://github.com/ROCmSoftwarePlatform/pytorch) to install pytorch on ROCm. * We recommend follow the instructions from [ROCm-Pytorch](https://github.com/ROCmSoftwarePlatform/pytorch) to install pytorch on ROCm.
* Note: For pytorch versions < 1.8, building from source is no longer supported, please use the release package [ROCm-Apex v0.3](https://github.com/ROCmSoftwarePlatform/apex/releases/tag/v0.3) .
# Quick Start # Quick Start
......
...@@ -6,8 +6,6 @@ import sys ...@@ -6,8 +6,6 @@ import sys
import warnings import warnings
import os import os
from torch.utils.hipify import hipify_python
# ninja build does not work unless include_dirs are abs path # ninja build does not work unless include_dirs are abs path
this_dir = os.path.dirname(os.path.abspath(__file__)) this_dir = os.path.dirname(os.path.abspath(__file__))
...@@ -145,16 +143,10 @@ if "--cuda_ext" in sys.argv: ...@@ -145,16 +143,10 @@ if "--cuda_ext" in sys.argv:
if not is_rocm_pytorch: if not is_rocm_pytorch:
check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME) check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)
if is_rocm_pytorch: print ("INFO: Building the multi-tensor apply extension.")
import shutil nvcc_args_multi_tensor = ['-lineinfo', '-O3', '--use_fast_math'] + version_dependent_macros
with hipify_python.GeneratedFileCleaner(keep_intermediates=True) as clean_ctx: hipcc_args_multi_tensor = ['-O3'] + version_dependent_macros
hipify_python.hipify(project_directory=this_dir, output_directory=this_dir, includes="csrc/*", ext_modules.append(
show_detailed=True, is_pytorch_extension=True, clean_ctx=clean_ctx)
shutil.copy("csrc/compat.h", "csrc/hip/compat.h")
shutil.copy("csrc/type_shim.h", "csrc/hip/type_shim.h")
if not is_rocm_pytorch:
ext_modules.append(
CUDAExtension(name='amp_C', CUDAExtension(name='amp_C',
sources=['csrc/amp_C_frontend.cpp', sources=['csrc/amp_C_frontend.cpp',
'csrc/multi_tensor_sgd_kernel.cu', 'csrc/multi_tensor_sgd_kernel.cu',
...@@ -167,77 +159,32 @@ if "--cuda_ext" in sys.argv: ...@@ -167,77 +159,32 @@ if "--cuda_ext" in sys.argv:
'csrc/multi_tensor_adagrad.cu', 'csrc/multi_tensor_adagrad.cu',
'csrc/multi_tensor_novograd.cu', 'csrc/multi_tensor_novograd.cu',
'csrc/multi_tensor_lamb.cu'], 'csrc/multi_tensor_lamb.cu'],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros, extra_compile_args = { 'cxx' : ['-O3'] + version_dependent_macros,
'nvcc':['-lineinfo', 'nvcc': nvcc_args_multi_tensor if not is_rocm_pytorch else hipcc_args_multi_tensor}))
'-O3',
# '--resource-usage',
'--use_fast_math'] + version_dependent_macros}))
else:
print ("INFO: Building Multitensor apply extension")
ext_modules.append(
CUDAExtension(name='amp_C',
sources=['csrc/amp_C_frontend.cpp',
'csrc/hip/multi_tensor_sgd_kernel.hip',
'csrc/hip/multi_tensor_scale_kernel.hip',
'csrc/hip/multi_tensor_axpby_kernel.hip',
'csrc/hip/multi_tensor_l2norm_kernel.hip',
'csrc/hip/multi_tensor_lamb_stage_1.hip',
'csrc/hip/multi_tensor_lamb_stage_2.hip',
'csrc/hip/multi_tensor_adam.hip',
'csrc/hip/multi_tensor_adagrad.hip',
'csrc/hip/multi_tensor_novograd.hip',
'csrc/hip/multi_tensor_lamb.hip'],
extra_compile_args=['-O3'] + version_dependent_macros))
if not is_rocm_pytorch: print ("INFO: Building syncbn extension.")
ext_modules.append( ext_modules.append(
CUDAExtension(name='syncbn', CUDAExtension(name='syncbn',
sources=['csrc/syncbn.cpp', sources=['csrc/syncbn.cpp',
'csrc/welford.cu'], 'csrc/welford.cu'],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros, extra_compile_args= ['-O3'] + version_dependent_macros))
'nvcc':['-O3'] + version_dependent_macros}))
else:
print ("INFO: Building syncbn extension.")
ext_modules.append(
CUDAExtension(name='syncbn',
sources=['csrc/syncbn.cpp',
'csrc/hip/welford.hip'],
extra_compile_args=['-O3'] + version_dependent_macros))
nvcc_args_layer_norm = ['maxrregcount=50', '-O3', '--use_fast_math'] + version_dependent_macros
if not is_rocm_pytorch: hipcc_args_layer_norm = ['-O3'] + version_dependent_macros
ext_modules.append( print ("INFO: Building fused layernorm extension.")
ext_modules.append(
CUDAExtension(name='fused_layer_norm_cuda', CUDAExtension(name='fused_layer_norm_cuda',
sources=['csrc/layer_norm_cuda.cpp', sources=['csrc/layer_norm_cuda.cpp',
'csrc/layer_norm_cuda_kernel.cu'], 'csrc/layer_norm_cuda_kernel.cu'],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros, extra_compile_args={'cxx': ['-O3'] + version_dependent_macros,
'nvcc':['-maxrregcount=50', 'nvcc': nvcc_args_layer_norm if not is_rocm_pytorch else hipcc_args_layer_norm}))
'-O3',
'--use_fast_math'] + version_dependent_macros}))
else:
print ("INFO: Building FusedLayerNorm extension.")
ext_modules.append(
CUDAExtension(name='fused_layer_norm_cuda',
sources=['csrc/layer_norm_cuda.cpp',
'csrc/hip/layer_norm_hip_kernel.hip'],
extra_compile_args={'cxx' : ['-O3'] + version_dependent_macros,
'nvcc' : []}))
if not is_rocm_pytorch: print ("INFO: Building the MLP Extension.")
ext_modules.append( ext_modules.append(
CUDAExtension(name='mlp_cuda', CUDAExtension(name='mlp_cuda',
sources=['csrc/mlp.cpp', sources=['csrc/mlp.cpp',
'csrc/mlp_cuda.cu'], 'csrc/mlp_cuda.cu'],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros, extra_compile_args=['-O3'] + version_dependent_macros))
'nvcc':['-O3'] + version_dependent_macros}))
else:
print ("INFO: Building MLP extension")
ext_modules.append(
CUDAExtension(name='mlp_cuda',
sources=['csrc/mlp.cpp',
'csrc/hip/mlp_hip.hip'],
extra_compile_args={'cxx' : ['-O3'] + version_dependent_macros,
'nvcc' : []}))
if "--bnp" in sys.argv: if "--bnp" in sys.argv:
from torch.utils.cpp_extension import CUDAExtension from torch.utils.cpp_extension import CUDAExtension
...@@ -274,20 +221,12 @@ if "--xentropy" in sys.argv: ...@@ -274,20 +221,12 @@ if "--xentropy" in sys.argv:
if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch): if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch):
raise RuntimeError("--xentropy was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.") raise RuntimeError("--xentropy was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.")
else: else:
if not is_rocm_pytorch: print ("INFO: Building the xentropy extension.")
ext_modules.append( ext_modules.append(
CUDAExtension(name='xentropy_cuda', CUDAExtension(name='xentropy_cuda',
sources=['apex/contrib/csrc/xentropy/interface.cpp', sources=['apex/contrib/csrc/xentropy/interface.cpp',
'apex/contrib/csrc/xentropy/xentropy_kernel.cu'], 'apex/contrib/csrc/xentropy/xentropy_kernel.cu'],
include_dirs=[os.path.join(this_dir, 'csrc')], include_dirs=[os.path.join(this_dir, 'csrc')],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros,
'nvcc':['-O3'] + version_dependent_macros}))
else:
ext_modules.append(
CUDAExtension(name='xentropy_cuda',
sources=['apex/contrib/csrc/xentropy/interface.cpp',
'apex/contrib/csrc/xentropy/hip/xentropy_kernel.hip'],
include_dirs=[os.path.join(this_dir, 'csrc/hip')],
extra_compile_args=['-O3'] + version_dependent_macros)) extra_compile_args=['-O3'] + version_dependent_macros))
...@@ -303,24 +242,16 @@ if "--deprecated_fused_adam" in sys.argv: ...@@ -303,24 +242,16 @@ if "--deprecated_fused_adam" in sys.argv:
if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch): if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch):
raise RuntimeError("--deprecated_fused_adam was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.") raise RuntimeError("--deprecated_fused_adam was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.")
else: else:
if not is_rocm_pytorch: print ("INFO: Building deprecated fused adam extension.")
ext_modules.append( nvcc_args_fused_adam = ['-O3', '--use_fast_math'] + version_dependent_macros
hipcc_args_fused_adam = ['-O3'] + version_dependent_macros
ext_modules.append(
CUDAExtension(name='fused_adam_cuda', CUDAExtension(name='fused_adam_cuda',
sources=['apex/contrib/csrc/optimizers/fused_adam_cuda.cpp', sources=['apex/contrib/csrc/optimizers/fused_adam_cuda.cpp',
'apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu'], 'apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu'],
include_dirs=[os.path.join(this_dir, 'csrc')], include_dirs=[os.path.join(this_dir, 'csrc')],
extra_compile_args={'cxx': ['-O3',] + version_dependent_macros, extra_compile_args={'cxx': ['-O3'] + version_dependent_macros,
'nvcc':['-O3', 'nvcc' : nvcc_args_fused_adam if not is_rocm_pytorch else hipcc_args_fused_adam}))
'--use_fast_math'] + version_dependent_macros}))
else:
print ("INFO: Building deprecated fused adam.")
ext_modules.append(
CUDAExtension(name='fused_adam_cuda',
sources=['apex/contrib/csrc/optimizers/fused_adam_cuda.cpp',
'apex/contrib/csrc/optimizers/hip/fused_adam_hip_kernel.hip'],
include_dirs=[os.path.join(this_dir, 'csrc/hip')],
extra_compile_args=['-O3'] + version_dependent_macros))
if "--deprecated_fused_lamb" in sys.argv: if "--deprecated_fused_lamb" in sys.argv:
from torch.utils.cpp_extension import CUDAExtension from torch.utils.cpp_extension import CUDAExtension
sys.argv.remove("--deprecated_fused_lamb") sys.argv.remove("--deprecated_fused_lamb")
...@@ -333,25 +264,16 @@ if "--deprecated_fused_lamb" in sys.argv: ...@@ -333,25 +264,16 @@ if "--deprecated_fused_lamb" in sys.argv:
if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch): if torch.utils.cpp_extension.CUDA_HOME is None and (not is_rocm_pytorch):
raise RuntimeError("--deprecated_fused_lamb was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.") raise RuntimeError("--deprecated_fused_lamb was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.")
else: else:
if not is_rocm_pytorch: print ("INFO: Building deprecated fused lamb extension.")
ext_modules.append( nvcc_args_fused_lamb = ['-O3', '--use_fast_math'] + version_dependent_macros
hipcc_args_fused_lamb = ['-O3'] + version_dependent_macros
ext_modules.append(
CUDAExtension(name='fused_lamb_cuda', CUDAExtension(name='fused_lamb_cuda',
sources=['apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp', sources=['apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp',
'apex/contrib/csrc/optimizers/fused_lamb_cuda_kernel.cu', 'apex/contrib/csrc/optimizers/fused_lamb_cuda_kernel.cu',
'csrc/multi_tensor_l2norm_kernel.cu'], 'csrc/multi_tensor_l2norm_kernel.cu'],
include_dirs=[os.path.join(this_dir, 'csrc')], include_dirs=[os.path.join(this_dir, 'csrc')],
extra_compile_args={'cxx': ['-O3',] + version_dependent_macros, extra_compile_args = nvcc_args_fused_lamb if not is_rocm_pytorch else hipcc_args_fused_lamb))
'nvcc':['-O3',
'--use_fast_math'] + version_dependent_macros}))
else:
print ("INFO: Building deprecated fused lamb.")
ext_modules.append(
CUDAExtension(name='fused_lamb_cuda',
sources=['apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp',
'apex/contrib/csrc/optimizers/hip/fused_lamb_hip_kernel.hip',
'csrc/hip/multi_tensor_l2norm_kernel.hip'],
include_dirs=[os.path.join(this_dir, 'csrc/hip')],
extra_compile_args=['-O3'] + version_dependent_macros))
# Check, if ATen/CUDAGenerator.h is found, otherwise use the new ATen/CUDAGeneratorImpl.h, due to breaking change in https://github.com/pytorch/pytorch/pull/36026 # Check, if ATen/CUDAGenerator.h is found, otherwise use the new ATen/CUDAGeneratorImpl.h, due to breaking change in https://github.com/pytorch/pytorch/pull/36026
generator_flag = [] generator_flag = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment