Unverified Commit 65b34702 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Makefile] Refactor CUDA makefile and add Hopper (SM90) to default build (#4830)



* Update CUDA.cmake to align with PyTorch's

* add Ada and Hopper

* add more comments

* resolve comments
Co-authored-by: default avatarTriston <triston.cao@gmail.com>
parent c8ea9fa4
...@@ -10,9 +10,16 @@ endif() ...@@ -10,9 +10,16 @@ endif()
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
set(dgl_known_gpu_archs "35 50 60 70") set(dgl_known_gpu_archs "35" "50" "60" "70")
set(dgl_cuda_arch_ptx "70")
if (CUDA_VERSION_MAJOR GREATER_EQUAL "11") if (CUDA_VERSION_MAJOR GREATER_EQUAL "11")
set(dgl_known_gpu_archs "${dgl_known_gpu_archs} 80") list(APPEND dgl_known_gpu_archs "80")
set(dgl_cuda_arch_ptx "80")
endif()
# CMake 3.5 doesn't support VERSION_GREATER_EQUAL
if (NOT CUDA_VERSION VERSION_LESS "11.8")
list(APPEND dgl_known_gpu_archs "90")
set(dgl_cuda_arch_ptx "90")
endif() endif()
################################################################################################ ################################################################################################
...@@ -63,10 +70,14 @@ set(CUDA_gpu_detect_output "") ...@@ -63,10 +70,14 @@ set(CUDA_gpu_detect_output "")
# nvcc outputs text containing line breaks when building with MSVC. # nvcc outputs text containing line breaks when building with MSVC.
# The line below prevents CMake from inserting a variable with line # The line below prevents CMake from inserting a variable with line
# breaks in the cache # breaks in the cache
message(STATUS "Found CUDA arch ${__nvcc_out}") message(STATUS "Found GPU arch ${__nvcc_out}")
string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") if(__nvcc_out VERSION_LESS "3.5")
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE) # drop support for cc < 3.5 and build for all known archs.
message(WARNING "GPU arch less than 3.5 is not supported.")
else()
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE)
endif()
else() else()
message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}") message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
endif() endif()
...@@ -86,8 +97,8 @@ endfunction() ...@@ -86,8 +97,8 @@ endfunction()
# Usage: # Usage:
# dgl_select_nvcc_arch_flags(out_variable) # dgl_select_nvcc_arch_flags(out_variable)
function(dgl_select_nvcc_arch_flags out_variable) function(dgl_select_nvcc_arch_flags out_variable)
# List of arch names # List of arch names. Turing and Ada don't have a new major version, so they are not added to default build.
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Ampere" "All" "Manual") set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Ada" "Hopper" "All" "Manual")
set(__archs_name_default "All") set(__archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
list(APPEND __archs_names "Auto") list(APPEND __archs_names "Auto")
...@@ -107,36 +118,53 @@ function(dgl_select_nvcc_arch_flags out_variable) ...@@ -107,36 +118,53 @@ function(dgl_select_nvcc_arch_flags out_variable)
if(${CUDA_ARCH_NAME} STREQUAL "Manual") if(${CUDA_ARCH_NAME} STREQUAL "Manual")
set(CUDA_ARCH_BIN ${dgl_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_BIN ${dgl_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") set(CUDA_ARCH_PTX ${dgl_cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
else() else()
unset(CUDA_ARCH_BIN CACHE) unset(CUDA_ARCH_BIN CACHE)
unset(CUDA_ARCH_PTX CACHE) unset(CUDA_ARCH_PTX CACHE)
endif() endif()
if(${CUDA_ARCH_NAME} STREQUAL "Fermi") if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(__cuda_arch_bin "20 21(20)") set(__cuda_arch_bin "35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(__cuda_arch_ptx "35")
set(__cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(__cuda_arch_bin "50") set(__cuda_arch_bin "50")
set(__cuda_arch_ptx "50")
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(__cuda_arch_bin "60 61") set(__cuda_arch_bin "60")
set(__cuda_arch_ptx "60")
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
set(__cuda_arch_bin "70") set(__cuda_arch_bin "70")
set(__cuda_arch_ptx "70")
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(__cuda_arch_bin "75")
set(__cuda_arch_ptx "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set(__cuda_arch_bin "80") set(__cuda_arch_bin "80")
set(__cuda_arch_ptx "80")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ada")
set(__cuda_arch_bin "89")
set(__cuda_arch_ptx "89")
elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
set(__cuda_arch_bin "90")
set(__cuda_arch_ptx "90")
elseif(${CUDA_ARCH_NAME} STREQUAL "All") elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(__cuda_arch_bin ${dgl_known_gpu_archs}) set(__cuda_arch_bin ${dgl_known_gpu_archs})
set(__cuda_arch_ptx ${dgl_cuda_arch_ptx})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
dgl_detect_installed_gpus(__cuda_arch_bin) dgl_detect_installed_gpus(__cuda_arch_bin)
# if detect successes, __cuda_arch_ptx = __cuda_arch_bin
# if detect fails, __cuda_arch_ptx is the latest arch in __cuda_arch_bin
list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
set(__cuda_arch_bin ${CUDA_ARCH_BIN}) set(__cuda_arch_bin ${CUDA_ARCH_BIN})
set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
endif() endif()
# remove dots and convert to lists # remove dots and convert to lists
string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}") string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}")
string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}") string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}")
mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx) mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment