Run tests based on labels (#10456)

50dc0c1e · Lianmin Zheng · GitHub · 76becc1d · 50dc0c1e · 50dc0c1e
Unverified Commit 50dc0c1e authored Sep 15, 2025 by Lianmin Zheng Committed by GitHub Sep 15, 2025
15 changed files
--- a/.github/workflows/execute-notebook.yml
+++ b/.github/workflows/execute-notebook.yml
@@ -6,6 +6,7 @@ on:
    paths:
      - "python/sglang/**"
      - "docs/**"
+    types: [synchronize, labeled]
  workflow_dispatch:
@@ -17,7 +18,7 @@ concurrency:
 jobs:
  run-all-notebooks:
    runs-on: 1-gpu-runner
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -18,5 +18,13 @@ jobs:
          python -m pip install pre-commit
          pre-commit install
-      - name: Linting
+      - name: Run pre-commit checks
        run: pre-commit run --all-files --show-diff-on-failure
+      - name: Run sgl-kernel clang-format checks
+        uses: DoozyX/clang-format-lint-action@v0.18.1
+        with:
+          source: sgl-kernel
+          extensions: h,c,cpp,hpp,cu,cuh,cc
+          clangFormatVersion: 18
+          style: file
--- a/.github/workflows/pr-benchmark-rust.yml
+++ b/.github/workflows/pr-benchmark-rust.yml
@@ -9,7 +9,7 @@ on:
    branches: [ main ]
    paths:
      - "sgl-router/**"
-    types: [opened, synchronize, reopened, labeled]
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -24,11 +24,11 @@ permissions:
  contents: read
  pull-requests: write
  issues: write
 jobs:
  # Quick check job that always runs on PRs
  benchmark-compile-check:
    name: Benchmark Compilation Check
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code

--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -17,6 +17,7 @@ on:
      - "test/**"
      - "sgl-kernel/**"
      - ".github/workflows/pr-test-amd.yml"
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -25,8 +26,7 @@ concurrency:
 jobs:
  accuracy-test-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -52,8 +52,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
  accuracy-test-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -77,8 +76,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
  mla-test-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -102,8 +100,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 test_mla.py
  performance-test-1-gpu-part-1-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -143,8 +140,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
  performance-test-1-gpu-part-2-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -178,8 +174,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
  bench-test-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -223,8 +218,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
  unit-test-backend-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -249,8 +243,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
  unit-test-backend-1-gpu-amd-mi35x:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -274,8 +267,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
  unit-test-backend-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -299,8 +291,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
  unit-test-backend-8-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -324,8 +315,7 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
  unit-test-sgl-kernel-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
@@ -353,25 +343,3 @@ jobs:
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
-  pr-test-amd-finish:
-    if: always()
-    needs: [
-      accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
-      accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
-      unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
-      unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
-    ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
--- a/.github/workflows/pr-test-h20.yml
+++ b/.github/workflows/pr-test-h20.yml
@@ -5,6 +5,7 @@ on:
    branches: [ main ]
  pull_request:
    branches: [ main ]
+    types: [synchronize, labeled]
  workflow_dispatch:
    inputs:
      version:
@@ -23,17 +24,29 @@ jobs:
  check-changes:
    runs-on: ubuntu-latest
    outputs:
-      src: ${{ steps.filter.outputs.src }}
+      h20_files: ${{ steps.filter.outputs.h20_files }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+      - name: Fail if the PR does not have the 'run-ci' label
+        if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
+        run: |
+          echo "This pull request does not have the 'run-ci' label. Failing the workflow."
+          exit 1
+      - name: Fail if the PR is a draft
+        if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
+        run: |
+          echo "This pull request is a draft. Failing the workflow."
+          exit 1
      - name: Detect file changes
        id: filter
        uses: dorny/paths-filter@v3
        with:
          filters: |
-            src:
+            h20_files:
              - "python/sglang/srt/models/deepseek*"
              - "python/sglang/srt/layers/moe/**"
              - ".github/workflows/pr-test-h20.yml"
@@ -41,9 +54,7 @@ jobs:
  per-commit-8-gpu-h20:
    needs: [check-changes]
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: needs.check-changes.outputs.h20_files == 'true'
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
    runs-on: 8-gpu-h20
    steps:
      - name: Checkout code
@@ -65,17 +76,31 @@ jobs:
      check-changes,
      per-commit-8-gpu-h20,
    ]
-    if: needs.check-changes.outputs.src == 'true'
+    if: always()
    runs-on: ubuntu-latest
    steps:
      - name: Check all dependent job statuses
        run: |
-          results=(${{ join(needs.*.result, ' ') }})
+          # Convert the 'needs' context to a JSON string
-          for result in "${results[@]}"; do
+          json_needs='${{ toJson(needs) }}'
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
+          # Get a list of all job names from the JSON keys
+          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
+          for job in $job_names; do
+            # For each job, extract its result
+            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
+            # Print the job name and its result
+            echo "$job: $result"
+            # Check for failure or cancellation and exit if found
+            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
+              echo "The above jobs failed."
              exit 1
            fi
          done
+          # If the loop completes, all jobs were successful
          echo "All jobs completed successfully"
          exit 0
--- a/.github/workflows/pr-test-npu.yml
+++ b/.github/workflows/pr-test-npu.yml
@@ -15,6 +15,7 @@ on:
      - "scripts/ci/**"
      - "test/**"
      - ".github/workflows/pr-test-npu.yml"
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -23,8 +24,7 @@ concurrency:
 jobs:
  per-commit-1-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: linux-arm64-npu-1
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -58,8 +58,7 @@ jobs:
          python3 run_suite.py --suite per-commit-1-ascend-npu
  per-commit-2-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: linux-arm64-npu-2
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -93,8 +92,7 @@ jobs:
          python3 run_suite.py --suite per-commit-2-ascend-npu
  per-commit-4-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: linux-arm64-npu-4
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -128,8 +126,7 @@ jobs:
          python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
  per-commit-16-ascend-a3:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: linux-aarch64-a3-16
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
@@ -161,24 +158,3 @@ jobs:
        run: |
          cd test/srt
          python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 5400
-  pr-test-npu-finish:
-    if: always()
-    needs:
-      - per-commit-1-ascend-npu
-      - per-commit-2-ascend-npu
-      - per-commit-4-ascend-npu
-      - per-commit-16-ascend-a3
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
--- a/.github/workflows/pr-test-pd-router.yml
+++ b/.github/workflows/pr-test-pd-router.yml
@@ -13,6 +13,7 @@ on:
      - 'python/sglang/srt/disaggregation/**'
      - 'scripts/ci/ci_start_disaggregation_servers.sh'
      - 'sgl-router/**'
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -26,8 +27,7 @@ permissions:
 jobs:
  test-disaggregation:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: [h200]
    timeout-minutes: 45

--- a/.github/workflows/pr-test-rust.yml
+++ b/.github/workflows/pr-test-rust.yml
@@ -9,6 +9,7 @@ on:
    branches: [ main ]
    paths:
      - "sgl-router/**"
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -21,7 +22,7 @@ env:
 jobs:
  unit-test-rust:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
@@ -81,7 +82,7 @@ jobs:
        run: sccache --show-stats
  pytest-rust:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
    runs-on: BM.A10.4
    timeout-minutes: 25
    steps:

--- a/.github/workflows/pr-test-xeon.yml
+++ b/.github/workflows/pr-test-xeon.yml
@@ -17,6 +17,7 @@ on:
      - "test/**"
      - "sgl-kernel/**"
      - ".github/workflows/pr-test-xeon.yml"
+    types: [synchronize, labeled]
  workflow_dispatch:
 concurrency:
@@ -25,8 +26,7 @@ concurrency:
 jobs:
  build-test:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-      github.event.pull_request.draft == false
    runs-on: xeon-gnr
    env:
      HF_HOME: /home/sdp/.cache/huggingface
@@ -87,20 +87,3 @@ jobs:
        if: always()
        run: |
          docker rm -f ci_sglang_xeon || true
-  pr-test-xeon-finish:
-    if: always()
-    needs: [build-test]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
--- a/.github/workflows/vllm-dependency-test.yml
+++ b/.github/workflows/vllm-dependency-test.yml
@@ -7,12 +7,16 @@ on:
      - "python/**"
      - "scripts/ci/**"
      - "test/**"
+      - ".github/workflows/vllm-dependency-test.yml"
  pull_request:
    branches: [ main ]
    paths:
      - "python/**"
      - "scripts/ci/**"
      - "test/**"
+      - ".github/workflows/vllm-dependency-test.yml"
+    types: [synchronize, labeled]
+  workflow_dispatch:
 concurrency:
  group: vllm-dependency-test-${{ github.ref }}
@@ -20,8 +24,7 @@ concurrency:
 jobs:
  vllm-dependency-test:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-        github.event.pull_request.draft == false
    runs-on: 1-gpu-runner
    steps:
      - name: Checkout code
@@ -32,12 +35,8 @@ jobs:
          bash scripts/ci/ci_install_dependency.sh
          pip install "bitsandbytes>=0.44.0"
-          pip install "sgl-kernel==0.3.9.post2"
      - name: Run vLLM dependency tests
-        timeout-minutes: 60
+        timeout-minutes: 30
        run: |
-          export SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK=1
          cd test/srt
-          python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 3600
+          python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 600
--- a/docs/developer_guide/contribution_guide.md
+++ b/docs/developer_guide/contribution_guide.md
@@ -68,6 +68,13 @@ You can identify potential reviewers for your code by checking the [code owners]
 Another effective strategy is to review the file modification history and contact individuals who have frequently edited the files.
 If you modify files protected by code owners, their approval is required to merge the code.
+## How to trigger CI
+To trigger CI, the pull request must have the "run-ci" label.
+- If you have write access to sgl-project/sglang, your pull request will be automatically tagged by @sglang-bot.
+- If you have triage access to sgl-project/sglang, you can manually add the label by clicking "Labels" on the right side of your pull request page.
+- If you do not have the above access, please request a review and ask other maintainers to add the label for you.
 ## General code style
 - Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
 - Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.

--- a/python/sglang/README.md
+++ b/python/sglang/README.md
-# Code Structures
+# Code Structure
 - `eval`: The evaluation utilities.
 - `lang`: The frontend language.
@@ -11,6 +11,6 @@
 - `bench_serving.py`: Benchmark online serving with dynamic requests.
 - `check_env.py`: Check the environment variables and dependencies.
 - `global_config.py`: The global configs and constants.
- `launch_server.py`: The entry point for launching the local server.
+- `launch_server.py`: The entry point for launching a local server.
 - `utils.py`: Common utilities.
 - `version.py`: Version info.
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -5,16 +5,15 @@
 [![PyPI](https://img.shields.io/pypi/v/sgl-kernel)](https://pypi.org/project/sgl-kernel)
 ## Installation
-For CUDA 12.1 and above:
 ```bash
 pip3 install sgl-kernel
 ```
-For CUDA 11.8:
+For CUDA 12.4:
 ```bash
-pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu118
+pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu124
 ```
 ## Build from source

--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -146,7 +146,6 @@ suites = {
        TestFile("test_full_deepseek_v3.py", 333),
    ],
    "per-commit-8-gpu-b200": [
-        # add more here
        TestFile("test_gpt_oss_4gpu.py", 600),
        TestFile("test_deepseek_v3_fp4_4gpu.py", 600),
    ],