Unverified Commit 838fa0f2 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[minor] cleanup cmakelists.txt (#5420)

parent f1b3b75f
...@@ -187,8 +187,6 @@ jobs: ...@@ -187,8 +187,6 @@ jobs:
timeout-minutes: 10 timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
- name: Benchmark single latency + torch.compile (TP=2) - name: Benchmark single latency + torch.compile (TP=2)
......
...@@ -4,6 +4,10 @@ project(sgl-kernel LANGUAGES CXX CUDA) ...@@ -4,6 +4,10 @@ project(sgl-kernel LANGUAGES CXX CUDA)
# CMake # CMake
cmake_policy(SET CMP0169 OLD) cmake_policy(SET CMP0169 OLD)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
set(CMAKE_COLOR_DIAGNOSTICS ON)
set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_SHARED_LIBRARY_PREFIX "")
# Python # Python
find_package(Python COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} REQUIRED) find_package(Python COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} REQUIRED)
...@@ -82,8 +86,6 @@ include_directories( ...@@ -82,8 +86,6 @@ include_directories(
${PROJECT_SOURCE_DIR}/csrc ${PROJECT_SOURCE_DIR}/csrc
${repo-cutlass_SOURCE_DIR}/include ${repo-cutlass_SOURCE_DIR}/include
${repo-cutlass_SOURCE_DIR}/tools/util/include ${repo-cutlass_SOURCE_DIR}/tools/util/include
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flashinfer_SOURCE_DIR}/include ${repo-flashinfer_SOURCE_DIR}/include
${repo-flashinfer_SOURCE_DIR}/csrc ${repo-flashinfer_SOURCE_DIR}/csrc
) )
...@@ -109,6 +111,8 @@ set(SGL_KERNEL_CUDA_FLAGS ...@@ -109,6 +111,8 @@ set(SGL_KERNEL_CUDA_FLAGS
"--expt-relaxed-constexpr" "--expt-relaxed-constexpr"
"--expt-extended-lambda" "--expt-extended-lambda"
"--threads=32" "--threads=32"
# Supress warnings
"-Xcompiler=-Wconversion" "-Xcompiler=-Wconversion"
"-Xcompiler=-fno-strict-aliasing" "-Xcompiler=-fno-strict-aliasing"
...@@ -209,17 +213,19 @@ Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI ...@@ -209,17 +213,19 @@ Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI
target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>) target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>)
target_include_directories(common_ops PRIVATE target_include_directories(common_ops PRIVATE
${TORCH_INCLUDE_DIRS} ${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src) ${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
)
target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt) target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt)
target_compile_definitions(common_ops PRIVATE target_compile_definitions(common_ops PRIVATE
FLASHATTENTION_DISABLE_BACKWARD FLASHATTENTION_DISABLE_BACKWARD
FLASHATTENTION_DISABLE_DROPOUT FLASHATTENTION_DISABLE_DROPOUT
FLASHATTENTION_DISABLE_UNEVEN_K FLASHATTENTION_DISABLE_UNEVEN_K
) )
install(TARGETS common_ops LIBRARY DESTINATION "sgl_kernel") install(TARGETS common_ops LIBRARY DESTINATION sgl_kernel)
# ============================ Optional Install ============================= # # ============================ Optional Install ============================= #
# set flash-attention sources file # set flash-attention sources file
...@@ -280,8 +286,8 @@ if (SGL_KERNEL_ENABLE_FA3) ...@@ -280,8 +286,8 @@ if (SGL_KERNEL_ENABLE_FA3)
target_compile_options(flash_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_FLASH_KERNEL_CUDA_FLAGS}>) target_compile_options(flash_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_FLASH_KERNEL_CUDA_FLAGS}>)
target_include_directories(flash_ops PRIVATE target_include_directories(flash_ops PRIVATE
${TORCH_INCLUDE_DIRS} ${repo-flash-attention_SOURCE_DIR}/hopper
${repo-flash-attention_SOURCE_DIR}/hopper) )
target_link_libraries(flash_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda) target_link_libraries(flash_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda)
install(TARGETS flash_ops LIBRARY DESTINATION "sgl_kernel") install(TARGETS flash_ops LIBRARY DESTINATION "sgl_kernel")
......
...@@ -35,6 +35,8 @@ docker run --rm \ ...@@ -35,6 +35,8 @@ docker run --rm \
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \ ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
export CUDA_VERSION=${CUDA_VERSION} && \ export CUDA_VERSION=${CUDA_VERSION} && \
export CMAKE_BUILD_PARALLEL_LEVEL=96
export MAX_JOBS=96
mkdir -p /usr/lib/x86_64-linux-gnu/ && \ mkdir -p /usr/lib/x86_64-linux-gnu/ && \
ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \ ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
cd /sgl-kernel && \ cd /sgl-kernel && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment