Expand and update test coverage for AMD CI (#10044)

36942660 · Hubert Lu · GitHub · 9f5e7018 · 36942660 · 36942660
Unverified Commit 36942660 authored Nov 04, 2025 by Hubert Lu Committed by GitHub Nov 04, 2025
4 changed files
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -25,13 +25,117 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  accuracy-test-1-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  check-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      main_package: ${{ steps.filter.outputs.main_package }}
+      sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Fail if the PR does not have the 'run-ci' label
+        if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
+        run: |
+          echo "This pull request does not have the 'run-ci' label. Failing the workflow."
+          exit 1
+
+      - name: Fail if the PR is a draft
+        if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
+        run: |
+          echo "This pull request is a draft. Failing the workflow."
+          exit 1
+
+      - name: Detect file changes
+        id: filter
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            main_package:
+              - "python/**"
+              - "scripts/ci/**"
+              - "test/**"
+              - ".github/workflows/pr-test-amd.yml"
+            sgl_kernel:
+              - "sgl-kernel/**"
+
+  # =============================================== sgl-kernel ====================================================
+  sgl-kernel-unit-test-amd:
+    needs: [check-changes]
+    if: needs.check-changes.outputs.sgl_kernel == 'true'
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi300-gpu-1]
    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Ensure VRAM is clear
+        run: bash scripts/ensure_vram_clear.sh rocm
+
+      - name: Start CI container
+        run: bash scripts/ci/amd_ci_start_container.sh
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/amd_ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 14
+        run: |
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
+
+  # =============================================== primary ====================================================
+
+  unit-test-frontend-amd:
+    needs: [check-changes]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: [linux-mi300-gpu-1]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Ensure VRAM is clear
+        run: bash scripts/ensure_vram_clear.sh rocm
+
+      - name: Start CI container
+        run: bash scripts/ci/amd_ci_start_container.sh
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/amd_ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 10
+        run: |
+          docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit
+
+  unit-test-backend-1-gpu-amd:
+    needs: [check-changes, unit-test-frontend-amd]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: [linux-mi300-gpu-1]
+        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -47,19 +151,20 @@ jobs:
      - name: Install dependencies
        run: bash scripts/ci/amd_ci_install_dependency.sh

-      - name: Evaluate Accuracy
+      - name: Run test
        timeout-minutes: 30
        run: |
-          bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
-          bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
-          bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
+          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12

-  accuracy-test-2-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  unit-test-backend-2-gpu-amd:
+    needs: [check-changes]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi300-gpu-2]
+        part: [0, 1]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
@@ -76,17 +181,20 @@ jobs:
      - name: Install dependencies
        run: bash scripts/ci/amd_ci_install_dependency.sh

-      - name: Evaluate accuracy (TP=2)
-        timeout-minutes: 60
+      - name: Run test
+        timeout-minutes: 30
        run: |
-          bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
+          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

-  mla-test-1-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  unit-test-backend-8-gpu-amd:
+    needs: [check-changes, unit-test-backend-2-gpu-amd]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
-        runner: [linux-mi300-gpu-1]
+        runner: [linux-mi300-gpu-8]
+        part: [0, 1, 2]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
@@ -103,13 +211,15 @@ jobs:
      - name: Install dependencies
        run: bash scripts/ci/amd_ci_install_dependency.sh

-      - name: MLA TEST
-        timeout-minutes: 30
+      - name: Run test
+        timeout-minutes: 60
        run: |
-          bash scripts/ci/amd_ci_exec.sh python3 test_mla.py
+          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600

  performance-test-1-gpu-part-1-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+    needs: [check-changes]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
@@ -152,7 +262,9 @@ jobs:
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size

  performance-test-1-gpu-part-2-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+    needs: [check-changes]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
@@ -188,8 +300,10 @@ jobs:
        run: |
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8

-  bench-test-2-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  performance-test-2-gpu-amd:
+    needs: [check-changes, unit-test-backend-2-gpu-amd]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
@@ -235,13 +349,14 @@ jobs:
        run: |
          bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache

-  unit-test-backend-1-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  accuracy-test-1-gpu-amd:
+    needs: [check-changes]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
        runner: [linux-mi300-gpu-1]
-        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
@@ -258,13 +373,17 @@ jobs:
      - name: Install dependencies
        run: bash scripts/ci/amd_ci_install_dependency.sh

-      - name: Run test
+      - name: Evaluate Accuracy
        timeout-minutes: 30
        run: |
-          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
+          bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
+          bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
+          bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py

-  unit-test-backend-2-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
+  accuracy-test-2-gpu-amd:
+    needs: [check-changes, accuracy-test-1-gpu-amd]
+    if: always() && !failure() && !cancelled() &&
+      ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
    strategy:
      fail-fast: false
      matrix:
@@ -285,68 +404,53 @@ jobs:
      - name: Install dependencies
        run: bash scripts/ci/amd_ci_install_dependency.sh

-      - name: Run test
-        timeout-minutes: 40
+      - name: Evaluate accuracy (TP=2)
+        timeout-minutes: 30
        run: |
-          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
+          bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py

-  unit-test-backend-8-gpu-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-    strategy:
-      fail-fast: false
-      matrix:
-        runner: [linux-mi300-gpu-8]
-        part: [0, 1]
-    runs-on: ${{matrix.runner}}
+  pr-test-amd-finish:
+    needs:
+      [
+        check-changes,
+
+        sgl-kernel-unit-test-amd,
+
+        unit-test-frontend-amd,
+        unit-test-backend-1-gpu-amd,
+        unit-test-backend-2-gpu-amd,
+        unit-test-backend-8-gpu-amd,
+        performance-test-1-gpu-part-1-amd,
+        performance-test-1-gpu-part-2-amd,
+        performance-test-2-gpu-amd,
+        accuracy-test-1-gpu-amd,
+        accuracy-test-2-gpu-amd,
+      ]
+    if: always()
+    runs-on: ubuntu-latest
    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Ensure VRAM is clear
-        run: bash scripts/ensure_vram_clear.sh rocm
-
-      - name: Start CI container
-        run: bash scripts/ci/amd_ci_start_container.sh
-        env:
-          GITHUB_WORKSPACE: ${{ github.workspace }}
-
-      - name: Install dependencies
-        run: bash scripts/ci/amd_ci_install_dependency.sh
-
-      - name: Run test
-        timeout-minutes: 60
+      - name: Check all dependent job statuses
        run: |
-          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600
+          # Convert the 'needs' context to a JSON string
+          json_needs='${{ toJson(needs) }}'

-  unit-test-sgl-kernel-amd:
-    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-    strategy:
-      fail-fast: false
-      matrix:
-        runner: [linux-mi300-gpu-1]
-    runs-on: ${{matrix.runner}}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
+          # Get a list of all job names from the JSON keys
+          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')

-      - name: Ensure VRAM is clear
-        run: bash scripts/ensure_vram_clear.sh rocm
+          for job in $job_names; do
+            # For each job, extract its result
+            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')

-      - name: Start CI container
-        run: bash scripts/ci/amd_ci_start_container.sh
-        env:
-          GITHUB_WORKSPACE: ${{ github.workspace }}
+            # Print the job name and its result
+            echo "$job: $result"

-      - name: Install dependencies
-        run: |
-          bash scripts/ci/amd_ci_install_dependency.sh
+            # Check for failure or cancellation and exit if found
+            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
+              echo "The above jobs failed."
+              exit 1
+            fi
+          done

-      - name: Run test
-        timeout-minutes: 14
-        run: |
-          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
-          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
-          docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
-          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
-          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
-          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
+          # If the loop completes, all jobs were successful
+          echo "All jobs completed successfully"
+          exit 0
--- a/test/srt/hicache/test_hicache_storage.py
+++ b/test/srt/hicache/test_hicache_storage.py
+import time
 import unittest
 from types import SimpleNamespace

@@ -39,6 +40,7 @@ class TestHiCache(CustomTestCase):
    @classmethod
    def tearDownClass(cls):
        kill_process_tree(cls.process.pid)
+        time.sleep(5)

    def test_mmlu(self):
        args = SimpleNamespace(

--- a/test/srt/models/test_vlm_models.py
+++ b/test/srt/models/test_vlm_models.py
@@ -8,7 +8,7 @@ import sys
 import unittest
 from types import SimpleNamespace

-from sglang.srt.utils import kill_process_tree
+from sglang.srt.utils import is_hip, kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
@@ -17,15 +17,16 @@ from sglang.test.test_utils import (
    popen_launch_server,
 )

+_is_hip = is_hip()
 # VLM models for testing
-MODELS = [
-    SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
-    SimpleNamespace(
-        model="Qwen/Qwen2.5-VL-3B-Instruct",
-        mmmu_accuracy=0.4,
-    ),
-    SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
-]
+if _is_hip:
+    MODELS = [SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4)]
+else:
+    MODELS = [
+        SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
+        SimpleNamespace(model="Qwen/Qwen2.5-VL-3B-Instruct", mmmu_accuracy=0.4),
+        SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
+    ]

 # Set default mem_fraction_static to 0.8
 DEFAULT_MEM_FRACTION_STATIC = 0.8

--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -125,6 +125,7 @@ suites = {
        TestFile("test_torch_compile_moe.py", 210),
        TestFile("test_torch_native_attention_backend.py", 123),
        TestFile("test_torchao.py", 70),
+        TestFile("test_triton_attention_kernels.py", 4),
        TestFile("test_triton_attention_backend.py", 150),
        TestFile("test_triton_attention_kernels.py", 4),
        TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
@@ -372,9 +373,9 @@ suites = {
 suite_amd = {
    "per-commit-amd": [
        TestFile("function_call/test_json_schema_constraint.py", 1),
-        TestFile("hicache/test_hicache.py", 116),
-        TestFile("hicache/test_hicache_mla.py", 127),
-        TestFile("hicache/test_hicache_storage.py", 127),
+        # TestFile("hicache/test_hicache.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
+        # TestFile("hicache/test_hicache_mla.py", 127), # Disabled temporarily,  # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
+        # TestFile("hicache/test_hicache_storage.py", 127), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
        TestFile("lora/test_lora.py", 150),
        TestFile("lora/test_lora_backend.py", 99),
        TestFile("lora/test_lora_cuda_graph.py", 250),
@@ -385,8 +386,9 @@ suite_amd = {
        TestFile("models/test_qwen_models.py", 82),
        TestFile("models/test_reward_models.py", 132),
        TestFile("models/test_transformers_models.py", 320),
-        TestFile("openai_server/basic/test_openai_embedding.py", 79),
-        TestFile("openai_server/basic/test_openai_server.py", 270),
+        TestFile("models/test_vlm_models.py", 437),
+        TestFile("openai_server/basic/test_openai_embedding.py", 141),
+        TestFile("openai_server/basic/test_openai_server.py", 149),
        TestFile("openai_server/basic/test_protocol.py", 10),
        TestFile("openai_server/basic/test_serving_chat.py", 10),
        TestFile("openai_server/basic/test_serving_completions.py", 10),
@@ -409,14 +411,15 @@ suite_amd = {
        TestFile("test_create_kvindices.py", 2),
        TestFile("test_eval_fp8_accuracy.py", 303),
        TestFile("test_function_call_parser.py", 10),
-        TestFile("test_fused_moe.py", 80),
+        TestFile("test_fused_moe.py", 30),
+        TestFile("test_harmony_parser.py", 20),
        TestFile("test_input_embeddings.py", 38),
        TestFile("test_io_struct.py", 8),
        TestFile("test_jinja_template_utils.py", 1),
        TestFile("test_metrics.py", 32),
        TestFile("test_metrics_utils.py", 1),
        TestFile("test_mla.py", 242),
-        TestFile("test_mla_deepseek_v3.py", 221),
+        # TestFile("test_mla_deepseek_v3.py", 221), # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
        TestFile("test_no_chunked_prefill.py", 108),
        TestFile("test_page_size.py", 60),
        TestFile("test_penalty.py", 180),
@@ -433,9 +436,12 @@ suite_amd = {
        TestFile("test_torch_compile.py", 169),
        TestFile("test_torch_compile_moe.py", 210),
        TestFile("test_torch_native_attention_backend.py", 123),
-        TestFile("test_triton_attention_backend.py", 250),
+        # TestFile("test_triton_attention_kernels.py", 4),
+        TestFile("test_triton_attention_backend.py", 150),
+        TestFile("test_triton_sliding_window.py", 250),
        TestFile("test_wave_attention_kernels.py", 2),
        # Disabled temporarily
+        # TestFile("test_vlm_input_format.py", 300),
        # TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
        # TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
        # TestFile("rl/test_update_weights_from_tensor.py", 48),