Unverified Commit 3e090482 authored by pdr's avatar pdr Committed by GitHub
Browse files

cuda arch flag for cublaslt (#701)

adding gb200 cuda arch flag for cublaslt compilation
parent 330c68aa
......@@ -9,6 +9,10 @@ find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND AND NOT CUDAToolkit_VERSION VERSION_LESS 11.8)
set(CMAKE_CUDA_STANDARD 17)
include(../cuda_common.cmake)
set(CUDA_ARCH_LIST "80;86;90")
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8.0")
list(APPEND CUDA_ARCH_LIST "100")
endif()
add_library(cublaslt_utils SHARED cublaslt_utils.cc)
target_link_libraries(cublaslt_utils CUDA::cublas CUDA::cublasLt)
......@@ -17,6 +21,7 @@ if(CUDAToolkit_FOUND AND NOT CUDAToolkit_VERSION VERSION_LESS 11.8)
add_executable(cublaslt_gemm cublaslt_gemm.cu)
target_link_libraries(cublaslt_gemm cublaslt_utils)
set_target_properties(cublaslt_gemm PROPERTIES CUDA_ARCHITECTURES "80;86;90")
set_target_properties(cublaslt_gemm PROPERTIES CUDA_ARCHITECTURES "${CUDA_ARCH_LIST}")
install(TARGETS cublaslt_gemm RUNTIME DESTINATION bin)
endif()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment