[Build] Disable sm_90a in cu11 (#5141)

45a1a69b · Simon Mo · GitHub · 87a658c8 · 45a1a69b
Unverified Commit 45a1a69b authored May 30, 2024 by Simon Mo Committed by GitHub May 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

CMakeLists.txt CMakeLists.txt +8 -6

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -177,7 +177,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
  include(FetchContent)
  SET(CUTLASS_ENABLE_HEADERS_ONLY=ON)
  FetchContent_Declare(
-        cutlass 
+        cutlass
        GIT_REPOSITORY https://github.com/nvidia/cutlass.git
        # CUTLASS 3.5.0
        GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc
@@ -200,11 +200,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
  # The CUTLASS kernels for Hopper require sm90a to be enabled.
  # This is done via the below gencode option, BUT that creates kernels for both sm90 and sm90a.
  # That adds an extra 17MB to compiled binary, so instead we selectively enable it.
-  set_source_files_properties(
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 11)
-      "csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
+    set_source_files_properties(
-      PROPERTIES
+          "csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
-      COMPILE_FLAGS
+          PROPERTIES
-      "-gencode arch=compute_90a,code=sm_90a")
+          COMPILE_FLAGS
+          "-gencode arch=compute_90a,code=sm_90a")
+  endif()
 endif()