Unverified Commit 36942660 authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

Expand and update test coverage for AMD CI (#10044)

parent 9f5e7018
......@@ -25,13 +25,117 @@ concurrency:
cancel-in-progress: true
jobs:
accuracy-test-1-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
check-changes:
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package }}
sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Fail if the PR does not have the 'run-ci' label
if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
run: |
echo "This pull request does not have the 'run-ci' label. Failing the workflow."
exit 1
- name: Fail if the PR is a draft
if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
run: |
echo "This pull request is a draft. Failing the workflow."
exit 1
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
with:
filters: |
main_package:
- "python/**"
- "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-amd.yml"
sgl_kernel:
- "sgl-kernel/**"
# =============================================== sgl-kernel ====================================================
sgl-kernel-unit-test-amd:
needs: [check-changes]
if: needs.check-changes.outputs.sgl_kernel == 'true'
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
# =============================================== primary ====================================================
unit-test-frontend-amd:
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
run: |
docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit
unit-test-backend-1-gpu-amd:
needs: [check-changes, unit-test-frontend-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
......@@ -47,19 +151,20 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate Accuracy
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
accuracy-test-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
unit-test-backend-2-gpu-amd:
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-2]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -76,17 +181,20 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate accuracy (TP=2)
timeout-minutes: 60
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
mla-test-1-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
unit-test-backend-8-gpu-amd:
needs: [check-changes, unit-test-backend-2-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runner: [linux-mi300-gpu-8]
part: [0, 1, 2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -103,13 +211,15 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: MLA TEST
timeout-minutes: 30
- name: Run test
timeout-minutes: 60
run: |
bash scripts/ci/amd_ci_exec.sh python3 test_mla.py
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600
performance-test-1-gpu-part-1-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
......@@ -152,7 +262,9 @@ jobs:
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
performance-test-1-gpu-part-2-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
......@@ -188,8 +300,10 @@ jobs:
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
bench-test-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
performance-test-2-gpu-amd:
needs: [check-changes, unit-test-backend-2-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
......@@ -235,13 +349,14 @@ jobs:
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
unit-test-backend-1-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
accuracy-test-1-gpu-amd:
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -258,13 +373,17 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
- name: Evaluate Accuracy
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
unit-test-backend-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
accuracy-test-2-gpu-amd:
needs: [check-changes, accuracy-test-1-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
......@@ -285,68 +404,53 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 40
- name: Evaluate accuracy (TP=2)
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
unit-test-backend-8-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-8]
part: [0, 1]
runs-on: ${{matrix.runner}}
pr-test-amd-finish:
needs:
[
check-changes,
sgl-kernel-unit-test-amd,
unit-test-frontend-amd,
unit-test-backend-1-gpu-amd,
unit-test-backend-2-gpu-amd,
unit-test-backend-8-gpu-amd,
performance-test-1-gpu-part-1-amd,
performance-test-1-gpu-part-2-amd,
performance-test-2-gpu-amd,
accuracy-test-1-gpu-amd,
accuracy-test-2-gpu-amd,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
- name: Check all dependent job statuses
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
unit-test-sgl-kernel-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
# Print the job name and its result
echo "$job: $result"
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
- name: Run test
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0
import time
import unittest
from types import SimpleNamespace
......@@ -39,6 +40,7 @@ class TestHiCache(CustomTestCase):
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
time.sleep(5)
def test_mmlu(self):
args = SimpleNamespace(
......
......@@ -8,7 +8,7 @@ import sys
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_process_tree
from sglang.srt.utils import is_hip, kill_process_tree
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
......@@ -17,15 +17,16 @@ from sglang.test.test_utils import (
popen_launch_server,
)
_is_hip = is_hip()
# VLM models for testing
MODELS = [
SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
SimpleNamespace(
model="Qwen/Qwen2.5-VL-3B-Instruct",
mmmu_accuracy=0.4,
),
SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
]
if _is_hip:
MODELS = [SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4)]
else:
MODELS = [
SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
SimpleNamespace(model="Qwen/Qwen2.5-VL-3B-Instruct", mmmu_accuracy=0.4),
SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
]
# Set default mem_fraction_static to 0.8
DEFAULT_MEM_FRACTION_STATIC = 0.8
......
......@@ -125,6 +125,7 @@ suites = {
TestFile("test_torch_compile_moe.py", 210),
TestFile("test_torch_native_attention_backend.py", 123),
TestFile("test_torchao.py", 70),
TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_attention_backend.py", 150),
TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
......@@ -372,9 +373,9 @@ suites = {
suite_amd = {
"per-commit-amd": [
TestFile("function_call/test_json_schema_constraint.py", 1),
TestFile("hicache/test_hicache.py", 116),
TestFile("hicache/test_hicache_mla.py", 127),
TestFile("hicache/test_hicache_storage.py", 127),
# TestFile("hicache/test_hicache.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
# TestFile("hicache/test_hicache_mla.py", 127), # Disabled temporarily, # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
# TestFile("hicache/test_hicache_storage.py", 127), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
TestFile("lora/test_lora.py", 150),
TestFile("lora/test_lora_backend.py", 99),
TestFile("lora/test_lora_cuda_graph.py", 250),
......@@ -385,8 +386,9 @@ suite_amd = {
TestFile("models/test_qwen_models.py", 82),
TestFile("models/test_reward_models.py", 132),
TestFile("models/test_transformers_models.py", 320),
TestFile("openai_server/basic/test_openai_embedding.py", 79),
TestFile("openai_server/basic/test_openai_server.py", 270),
TestFile("models/test_vlm_models.py", 437),
TestFile("openai_server/basic/test_openai_embedding.py", 141),
TestFile("openai_server/basic/test_openai_server.py", 149),
TestFile("openai_server/basic/test_protocol.py", 10),
TestFile("openai_server/basic/test_serving_chat.py", 10),
TestFile("openai_server/basic/test_serving_completions.py", 10),
......@@ -409,14 +411,15 @@ suite_amd = {
TestFile("test_create_kvindices.py", 2),
TestFile("test_eval_fp8_accuracy.py", 303),
TestFile("test_function_call_parser.py", 10),
TestFile("test_fused_moe.py", 80),
TestFile("test_fused_moe.py", 30),
TestFile("test_harmony_parser.py", 20),
TestFile("test_input_embeddings.py", 38),
TestFile("test_io_struct.py", 8),
TestFile("test_jinja_template_utils.py", 1),
TestFile("test_metrics.py", 32),
TestFile("test_metrics_utils.py", 1),
TestFile("test_mla.py", 242),
TestFile("test_mla_deepseek_v3.py", 221),
# TestFile("test_mla_deepseek_v3.py", 221), # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
TestFile("test_no_chunked_prefill.py", 108),
TestFile("test_page_size.py", 60),
TestFile("test_penalty.py", 180),
......@@ -433,9 +436,12 @@ suite_amd = {
TestFile("test_torch_compile.py", 169),
TestFile("test_torch_compile_moe.py", 210),
TestFile("test_torch_native_attention_backend.py", 123),
TestFile("test_triton_attention_backend.py", 250),
# TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_attention_backend.py", 150),
TestFile("test_triton_sliding_window.py", 250),
TestFile("test_wave_attention_kernels.py", 2),
# Disabled temporarily
# TestFile("test_vlm_input_format.py", 300),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
# TestFile("rl/test_update_weights_from_tensor.py", 48),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment