Unverified Commit 00328ac7 authored by Kirthi Shankar Sivamani's avatar Kirthi Shankar Sivamani Committed by GitHub
Browse files

Build support for cuda 13 (#1809)



* Build support for cuda 13
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>

* Fix build for cudnn 8.9*; cuda 12.1
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>

* readd include
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>

---------
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>
parent 6262280e
......@@ -242,9 +242,12 @@ def get_cuda_include_dirs() -> Tuple[str, str]:
def cuda_archs() -> str:
version = cuda_version()
if os.getenv("NVTE_CUDA_ARCHS") is None:
os.environ["NVTE_CUDA_ARCHS"] = (
"70;80;89;90;100;120" if version >= (12, 8) else "70;80;89;90"
)
if version >= (13, 0):
os.environ["NVTE_CUDA_ARCHS"] = "75;80;89;90;100;120"
elif version >= (12, 8):
os.environ["NVTE_CUDA_ARCHS"] = "70;80;89;90;100;120"
else:
os.environ["NVTE_CUDA_ARCHS"] = "70;80;89;90"
return os.getenv("NVTE_CUDA_ARCHS")
......
......@@ -22,6 +22,7 @@ from build_tools.utils import (
get_frameworks,
install_and_import,
remove_dups,
cuda_toolkit_include_path,
)
frameworks = get_frameworks()
......@@ -88,7 +89,10 @@ def setup_requirements() -> Tuple[List[str], List[str], List[str]]:
"""
# Common requirements
setup_reqs: List[str] = [
setup_reqs: List[str] = []
if cuda_toolkit_include_path() is None:
setup_reqs.extend(
[
"nvidia-cuda-runtime-cu12",
"nvidia-cublas-cu12",
"nvidia-cudnn-cu12",
......@@ -97,6 +101,7 @@ def setup_requirements() -> Tuple[List[str], List[str], List[str]]:
"nvidia-nvtx-cu12",
"nvidia-cuda-nvrtc-cu12",
]
)
install_reqs: List[str] = [
"pydantic",
"importlib-metadata>=1.0",
......
......@@ -6,7 +6,9 @@ cmake_minimum_required(VERSION 3.21)
# Language options
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13.0)
set(CMAKE_CUDA_ARCHITECTURES 75 80 89 90 100 120)
elseif (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
set(CMAKE_CUDA_ARCHITECTURES 70 80 89 90 100 120)
else ()
set(CMAKE_CUDA_ARCHITECTURES 70 80 89 90)
......
......@@ -132,9 +132,10 @@ void create_2D_tensor_map(CUtensorMap &tensorMap, const SimpleTensor &tensor,
const uint32_t shmemX, const uint32_t stride_elems,
const uint32_t offset_elems, const size_t type_size) {
// Get a function pointer to the cuTensorMapEncodeTiled driver API
static PFN_cuTensorMapEncodeTiled cuDriverTensorMapEncodeTiled = []() {
// Note: PFN_cuTensorMapEncodeTiled is not defined in cuda13
static PFN_cuTensorMapEncodeTiled_v12000 cuDriverTensorMapEncodeTiled = []() {
void *driver_ptr = cuda_driver::get_symbol("cuTensorMapEncodeTiled");
return reinterpret_cast<PFN_cuTensorMapEncodeTiled>(driver_ptr);
return reinterpret_cast<PFN_cuTensorMapEncodeTiled_v12000>(driver_ptr);
}();
// rank is the number of dimensions of the array
constexpr uint32_t rank = 2;
......
......@@ -493,7 +493,8 @@ void cublas_gemm(const Tensor *inputA, const Tensor *inputB, Tensor *outputD,
NVTE_CHECK_CUBLAS(cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_EPILOGUE,
&epilogue, sizeof(epilogue)));
#if CUDA_VERSION >= 12020 && CUBLAS_VERSION >= 120205
#if CUDA_VERSION >= 12020 && CUBLAS_VERSION >= 120205 && CUDA_VERSION < 13000 && \
CUBLAS_VERSION < 130000
if (counter != nullptr) {
if (m_split == 0) m_split = 1;
if (n_split == 0) n_split = 1;
......@@ -609,8 +610,10 @@ void nvte_cublas_atomic_gemm(const NVTETensor A, const NVTETensor B, NVTETensor
int cudart_version;
NVTE_CHECK_CUDA(cudaRuntimeGetVersion(&cudart_version));
NVTE_CHECK(cudart_version >= 12020, "Cuda version 12.2 is required for atomic gemm.");
NVTE_CHECK(cublasLtGetVersion() >= 120205, "Cublas version 12.2.5 is required for atomic gemm.");
NVTE_CHECK(cudart_version >= 12020 && cudart_version < 13000,
"Cuda version >=12.2 and <13.0 is required for atomic gemm.");
NVTE_CHECK(cublasLtGetVersion() >= 120205 && cublasLtGetVersion() < 130000,
"Cublas version >=12.2.5 and <13.0 is required for atomic gemm.");
using namespace transformer_engine;
const Tensor *inputA = reinterpret_cast<const Tensor *>(A);
......
......@@ -44,7 +44,7 @@ if bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))) or os.path.isdir(build_tools_
from build_tools.build_ext import get_build_ext
from build_tools.utils import copy_common_headers, install_and_import
from build_tools.utils import copy_common_headers, install_and_import, cuda_toolkit_include_path
from build_tools.te_version import te_version
from build_tools.jax import setup_jax_extension
......@@ -94,16 +94,10 @@ if __name__ == "__main__":
)
]
# Configure package
setuptools.setup(
name="transformer_engine_jax",
version=te_version(),
description="Transformer acceleration library - Jax Lib",
ext_modules=ext_modules,
cmdclass={"build_ext": CMakeBuildExtension},
setup_requires=[
"jax[cuda12]",
"flax>=0.7.1",
setup_requires = ["jax[cuda12]", "flax>=0.7.1"]
if cuda_toolkit_include_path() is None:
setup_requires.extend(
[
"nvidia-cuda-runtime-cu12",
"nvidia-cublas-cu12",
"nvidia-cudnn-cu12",
......@@ -111,7 +105,17 @@ if __name__ == "__main__":
"nvidia-cuda-nvcc-cu12",
"nvidia-nvtx-cu12",
"nvidia-cuda-nvrtc-cu12",
],
]
)
# Configure package
setuptools.setup(
name="transformer_engine_jax",
version=te_version(),
description="Transformer acceleration library - Jax Lib",
ext_modules=ext_modules,
cmdclass={"build_ext": CMakeBuildExtension},
setup_requires=setup_requires,
install_requires=["jax", "flax>=0.7.1"],
tests_require=["numpy"],
)
......
......@@ -29,7 +29,7 @@ if bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))) or os.path.isdir(build_tools_
from build_tools.build_ext import get_build_ext
from build_tools.utils import copy_common_headers
from build_tools.utils import copy_common_headers, cuda_toolkit_include_path
from build_tools.te_version import te_version
from build_tools.pytorch import setup_pytorch_extension
......@@ -48,15 +48,10 @@ if __name__ == "__main__":
)
]
# Configure package
setuptools.setup(
name="transformer_engine_torch",
version=te_version(),
description="Transformer acceleration library - Torch Lib",
ext_modules=ext_modules,
cmdclass={"build_ext": CMakeBuildExtension},
setup_requires=[
"torch>=2.1",
setup_requires = ["torch>=2.1"]
if cuda_toolkit_include_path() is None:
setup_requires.extend(
[
"nvidia-cuda-runtime-cu12",
"nvidia-cublas-cu12",
"nvidia-cudnn-cu12",
......@@ -64,7 +59,17 @@ if __name__ == "__main__":
"nvidia-cuda-nvcc-cu12",
"nvidia-nvtx-cu12",
"nvidia-cuda-nvrtc-cu12",
],
]
)
# Configure package
setuptools.setup(
name="transformer_engine_torch",
version=te_version(),
description="Transformer acceleration library - Torch Lib",
ext_modules=ext_modules,
cmdclass={"build_ext": CMakeBuildExtension},
setup_requires=setup_requires,
install_requires=["torch>=2.1"],
tests_require=["numpy", "torchvision"],
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment