Unverified Commit b639327a authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Revert "Use NVCC --compress-mode to reduce binary size by 30% #20694" (#20853)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 4afe687a
...@@ -171,16 +171,6 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA") ...@@ -171,16 +171,6 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA")
list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}") list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}")
endif() endif()
#
# Set nvcc fatbin compression.
#
if(VLLM_GPU_LANG STREQUAL "CUDA")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
list(APPEND VLLM_GPU_FLAGS "-Xfatbin" "-compress-all" "-compress-mode=size")
endif()
endif()
# #
# Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process. # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
# setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache. # setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment