Revert "Use NVCC --compress-mode to reduce binary size by 30% #20694" (#20853)

Signed-off-by: mgoin <mgoin64@gmail.com>

Revert "Use NVCC --compress-mode to reduce binary size by 30% #20694" (#20853)
Signed-off-by: mgoin <mgoin64@gmail.com>
b639327a · Michael Goin · GitHub · 4afe687a · b639327a
Unverified Commit b639327a authored Jul 12, 2025 by Michael Goin Committed by GitHub Jul 11, 2025
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 10 deletions

CMakeLists.txt CMakeLists.txt +0 -10

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -171,16 +171,6 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA")
  list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}")
 endif()
-#
-# Set nvcc fatbin compression.
-#
-if(VLLM_GPU_LANG STREQUAL "CUDA")
-  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
-    list(APPEND VLLM_GPU_FLAGS "-Xfatbin" "-compress-all" "-compress-mode=size")
-  endif()
-endif()
 #
 # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
 # setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.