Unverified Commit 195a59fe authored by Sai Enduri's avatar Sai Enduri Committed by GitHub
Browse files

Refactor AMD CI. (#11128)

parent 47488cc3
......@@ -30,7 +30,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -56,7 +56,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
runner: [linux-mi325-gpu-2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -80,7 +80,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -104,7 +104,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -144,7 +144,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -178,7 +178,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runner: [linux-mi325-gpu-2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -222,8 +222,8 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7]
runner: [linux-mi325-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -238,40 +238,16 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 50
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
unit-test-backend-1-gpu-amd-mi35x:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 50
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
unit-test-backend-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runner: [linux-mi325-gpu-2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......@@ -319,7 +295,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
......
......@@ -184,7 +184,7 @@ suite_amd = {
TestFile("lora/test_multi_lora_backend.py", 60),
TestFile("lora/test_lora_cuda_graph.py", 250),
TestFile("lora/test_lora_qwen3.py", 97),
TestFile("models/test_embedding_models.py", 73),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
TestFile("models/test_compressed_tensors_models.py", 42),
TestFile("models/test_qwen_models.py", 82),
TestFile("models/test_reward_models.py", 132),
......@@ -246,7 +246,7 @@ suite_amd = {
TestFile("test_triton_attention_backend.py", 150),
# TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
TestFile("test_wave_attention_kernels.py", 2),
TestFile("test_wave_attention_backend.py", 150),
# TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-amd-mi35x": [
TestFile("test_mla.py", 242),
......@@ -257,7 +257,7 @@ suite_amd = {
TestFile("rl/test_update_weights_from_distributed.py", 103),
TestFile("test_data_parallelism.py", 73),
TestFile("test_load_weights_from_remote_instance.py", 72),
TestFile("test_patch_torch.py", 19),
# TestFile("test_patch_torch.py", 19), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
],
"per-commit-4-gpu-amd": [
TestFile("test_pp_single_node.py", 150),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment