Reorganize CI and test files (#9027)

2c7f01bc · Lianmin Zheng · GitHub · b58ae7a2 · 2c7f01bc · 2c7f01bc
Unverified Commit 2c7f01bc authored Aug 10, 2025 by Lianmin Zheng Committed by GitHub Aug 10, 2025
20 changed files
--- a/scripts/ci_start_disaggregation_servers.sh
+++ b/scripts/ci_start_disaggregation_servers.sh
--- a/scripts/npu_ci_install_dependency.sh
+++ b/scripts/npu_ci_install_dependency.sh
--- a/scripts/ci_cache_models.sh
+++ b/scripts/ci_cache_models.sh
-#!/bin/bash
-set -euxo pipefail
-
-mapfile -t models < <(python3 -c "from sglang.test.test_utils import _get_default_models; print(_get_default_models())" | jq -r '.[]')
-
-if [ ${#models[@]} -eq 0 ]; then
-    echo "Failed to get default models."
-    exit 1
-fi
-
-cache_dir="${DEFAULT_MODEL_CACHE_DIR:-}"
-
-if [ -z "$cache_dir" ]; then
-    echo "DEFAULT_MODEL_CACHE_DIR environment variable is not set."
-    exit 1
-fi
-
-failed_models=()
-for model in "${models[@]}"; do
-    local_model_dir="$cache_dir/$model"
-    echo "Caching model: $model to $local_model_dir"
-    mkdir -p "$local_model_dir"
-
-    if ! huggingface-cli download "$model" \
-        --local-dir "$local_model_dir" \
-        --local-dir-use-symlinks False 2>/dev/null; then
-        echo "WARNING: Failed to cache model: $model"
-        rm -rf "$local_model_dir"
-        failed_models+=("$model")
-        continue
-    fi
-    echo "Successfully cached model: $model"
-done
-
-if [ ${#failed_models[@]} -gt 0 ]; then
-    echo -e "\n[Summary] Failed to cache following models:"
-    printf ' - %s\n' "${failed_models[@]}"
-else
-    echo -e "\n[Summary] All models cached successfully"
-fi
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -87,6 +87,7 @@ FetchContent_Declare(
    GIT_SHALLOW    OFF
 )
 FetchContent_Populate(repo-flashinfer)
+
 # flash-attention
 FetchContent_Declare(
    repo-flash-attention
@@ -95,6 +96,7 @@ FetchContent_Declare(
    GIT_SHALLOW    OFF
 )
 FetchContent_Populate(repo-flash-attention)
+
 # mscclpp
 FetchContent_Declare(
    repo-mscclpp
@@ -232,6 +234,7 @@ set(SOURCES
    "csrc/elementwise/activation.cu"
    "csrc/elementwise/fused_add_rms_norm_kernel.cu"
    "csrc/elementwise/rope.cu"
+    "csrc/common_extension.cc"
    "csrc/gemm/awq_kernel.cu"
    "csrc/gemm/bmm_fp8.cu"
    "csrc/gemm/dsv3_fused_a_gemm.cu"
@@ -251,24 +254,10 @@ set(SOURCES
    "csrc/gemm/per_token_quant_fp8.cu"
    "csrc/gemm/qserve_w4a8_per_chn_gemm.cu"
    "csrc/gemm/qserve_w4a8_per_group_gemm.cu"
-    "csrc/moe/moe_align_kernel.cu"
-    "csrc/moe/moe_fused_gate.cu"
-    "csrc/moe/moe_topk_softmax_kernels.cu"
-    "csrc/moe/nvfp4_blockwise_moe.cu"
-    "csrc/moe/fp8_blockwise_moe_kernel.cu"
-    "csrc/moe/prepare_moe_input.cu"
-    "csrc/moe/ep_moe_reorder_kernel.cu"
-    "csrc/moe/ep_moe_silu_and_mul_kernel.cu"
-    "csrc/speculative/eagle_utils.cu"
-    "csrc/speculative/packbit.cu"
-    "csrc/spatial/greenctx_stream.cu"
-    "csrc/speculative/speculative_sampling.cu"
    "csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
-    "csrc/kvcacheio/transfer.cu"
    "csrc/moe/cutlass_moe/w4a8/scaled_mm_entry.cu"
    "csrc/moe/cutlass_moe/w4a8/w4a8_moe_data.cu"
    "csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cu"
-    "csrc/common_extension.cc"
    "csrc/moe/marlin_moe_wna16/ops.cu"
    "csrc/moe/marlin_moe_wna16/gptq_marlin_repack.cu"
    "csrc/moe/marlin_moe_wna16/awq_marlin_repack.cu"
@@ -278,6 +267,19 @@ set(SOURCES
    "csrc/moe/marlin_moe_wna16/kernel_fp16_ku4.cu"
    "csrc/moe/marlin_moe_wna16/kernel_fp16_ku4b8.cu"
    "csrc/moe/marlin_moe_wna16/kernel_fp16_ku8b128.cu"
+    "csrc/moe/moe_align_kernel.cu"
+    "csrc/moe/moe_fused_gate.cu"
+    "csrc/moe/moe_topk_softmax_kernels.cu"
+    "csrc/moe/nvfp4_blockwise_moe.cu"
+    "csrc/moe/fp8_blockwise_moe_kernel.cu"
+    "csrc/moe/prepare_moe_input.cu"
+    "csrc/moe/ep_moe_reorder_kernel.cu"
+    "csrc/moe/ep_moe_silu_and_mul_kernel.cu"
+    "csrc/kvcacheio/transfer.cu"
+    "csrc/speculative/eagle_utils.cu"
+    "csrc/speculative/packbit.cu"
+    "csrc/spatial/greenctx_stream.cu"
+    "csrc/speculative/speculative_sampling.cu"
    "${repo-flashinfer_SOURCE_DIR}/csrc/norm.cu"
    "${repo-flashinfer_SOURCE_DIR}/csrc/renorm.cu"
    "${repo-flashinfer_SOURCE_DIR}/csrc/sampling.cu"
@@ -312,12 +314,15 @@ else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
 endif()
+
+# mscclpp
 set(MSCCLPP_USE_CUDA ON)
 set(MSCCLPP_BYPASS_GPU_CHECK ON)
 set(MSCCLPP_BUILD_TESTS OFF)
 add_subdirectory(${repo-mscclpp_SOURCE_DIR})
 target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt mscclpp_static)

+# flash attention
 target_compile_definitions(common_ops PRIVATE
    FLASHATTENTION_DISABLE_BACKWARD
    FLASHATTENTION_DISABLE_DROPOUT

--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -5,18 +5,18 @@
 [![PyPI](https://img.shields.io/pypi/v/sgl-kernel)](https://pypi.org/project/sgl-kernel)

 ## Installation
-
-For CUDA 11.8:
+For CUDA 12.1 and above:

 ```bash
-pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu118
+pip3 install sgl-kernel
 ```

-For CUDA 12.1 or CUDA 12.4:
+For CUDA 11.8:

 ```bash
-pip3 install sgl-kernel
+pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu118
 ```
+
 ## Build from source

 Development build:

--- a/test/srt/test_ascend_mla_w8a8int8.py
+++ b/test/srt/test_ascend_mla_w8a8int8.py
--- a/test/srt/test_ascend_tp1_bf16.py
+++ b/test/srt/test_ascend_tp1_bf16.py
--- a/test/srt/test_ascend_tp2_bf16.py
+++ b/test/srt/test_ascend_tp2_bf16.py
--- a/test/srt/test_deepep_internode.py
+++ b/test/srt/test_deepep_internode.py
--- a/test/srt/test_deepep_intranode.py
+++ b/test/srt/test_deepep_intranode.py
--- a/test/srt/test_deepep_large.py
+++ b/test/srt/test_deepep_large.py
--- a/test/srt/test_deepep_low_latency.py
+++ b/test/srt/test_deepep_low_latency.py
--- a/test/srt/test_deepep_small.py
+++ b/test/srt/test_deepep_small.py
--- a/test/srt/test_eplb.py
+++ b/test/srt/test_eplb.py
--- a/test/srt/test_hybrid_dp_ep_tp_mtp.py
+++ b/test/srt/test_hybrid_dp_ep_tp_mtp.py
--- a/test/srt/test_moe_deepep.py
+++ b/test/srt/test_moe_deepep.py
--- a/test/srt/test_moe_deepep_eval_accuracy_large.py
+++ b/test/srt/test_moe_deepep_eval_accuracy_large.py
--- a/test/srt/test_moe_ep.py
+++ b/test/srt/test_moe_ep.py
--- a/test/srt/test_hicache.py
+++ b/test/srt/test_hicache.py
--- a/test/srt/test_hicache_mla.py
+++ b/test/srt/test_hicache_mla.py