Unverified Commit 65b34702 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Makefile] Refactor CUDA makefile and add Hopper (SM90) to default build (#4830)



* Update CUDA.cmake to align with PyTorch's

* add Ada and Hopper

* add more comments

* resolve comments
Co-authored-by: default avatarTriston <triston.cao@gmail.com>
parent c8ea9fa4
......@@ -10,9 +10,16 @@ endif()
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
set(dgl_known_gpu_archs "35 50 60 70")
set(dgl_known_gpu_archs "35" "50" "60" "70")
set(dgl_cuda_arch_ptx "70")
if (CUDA_VERSION_MAJOR GREATER_EQUAL "11")
set(dgl_known_gpu_archs "${dgl_known_gpu_archs} 80")
list(APPEND dgl_known_gpu_archs "80")
set(dgl_cuda_arch_ptx "80")
endif()
# CMake 3.5 doesn't support VERSION_GREATER_EQUAL
if (NOT CUDA_VERSION VERSION_LESS "11.8")
list(APPEND dgl_known_gpu_archs "90")
set(dgl_cuda_arch_ptx "90")
endif()
################################################################################################
......@@ -63,10 +70,14 @@ set(CUDA_gpu_detect_output "")
# nvcc outputs text containing line breaks when building with MSVC.
# The line below prevents CMake from inserting a variable with line
# breaks in the cache
message(STATUS "Found CUDA arch ${__nvcc_out}")
message(STATUS "Found GPU arch ${__nvcc_out}")
string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
if(__nvcc_out VERSION_LESS "3.5")
# drop support for cc < 3.5 and build for all known archs.
message(WARNING "GPU arch less than 3.5 is not supported.")
else()
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE)
endif()
else()
message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
endif()
......@@ -86,8 +97,8 @@ endfunction()
# Usage:
# dgl_select_nvcc_arch_flags(out_variable)
function(dgl_select_nvcc_arch_flags out_variable)
# List of arch names
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Ampere" "All" "Manual")
# List of arch names. Turing and Ada don't have a new major version, so they are not added to default build.
set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Ada" "Hopper" "All" "Manual")
set(__archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND __archs_names "Auto")
......@@ -107,36 +118,53 @@ function(dgl_select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Manual")
set(CUDA_ARCH_BIN ${dgl_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
set(CUDA_ARCH_PTX ${dgl_cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
else()
unset(CUDA_ARCH_BIN CACHE)
unset(CUDA_ARCH_PTX CACHE)
endif()
if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
set(__cuda_arch_bin "20 21(20)")
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(__cuda_arch_bin "30 35")
if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(__cuda_arch_bin "35")
set(__cuda_arch_ptx "35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(__cuda_arch_bin "50")
set(__cuda_arch_ptx "50")
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(__cuda_arch_bin "60 61")
set(__cuda_arch_bin "60")
set(__cuda_arch_ptx "60")
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
set(__cuda_arch_bin "70")
set(__cuda_arch_ptx "70")
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(__cuda_arch_bin "75")
set(__cuda_arch_ptx "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set(__cuda_arch_bin "80")
set(__cuda_arch_ptx "80")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ada")
set(__cuda_arch_bin "89")
set(__cuda_arch_ptx "89")
elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
set(__cuda_arch_bin "90")
set(__cuda_arch_ptx "90")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(__cuda_arch_bin ${dgl_known_gpu_archs})
set(__cuda_arch_ptx ${dgl_cuda_arch_ptx})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
dgl_detect_installed_gpus(__cuda_arch_bin)
# if detect successes, __cuda_arch_ptx = __cuda_arch_bin
# if detect fails, __cuda_arch_ptx is the latest arch in __cuda_arch_bin
list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
endif()
# remove dots and convert to lists
string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}")
string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}")
string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}")
mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment