Unverified Commit 70fbb3ad authored by Shangming Cai's avatar Shangming Cai Committed by GitHub
Browse files

[CI] Refactor PD disaggregation test suite (#11363)


Signed-off-by: default avatarShangming Cai <csmthu@gmail.com>
parent 9a7e7a65
......@@ -693,6 +693,87 @@ jobs:
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
unit-test-disaggregation-2-gpu:
needs: [check-changes, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite per-commit-2-gpu-disaggregation
unit-test-disaggregation-4-gpu:
needs: [check-changes, unit-test-disaggregation-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 4-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu-disaggregation
unit-test-disaggregation-8-gpu:
needs: [check-changes, unit-test-disaggregation-2-gpu, sgl-kernel-build-wheels]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-disaggregation
pr-test-finish:
needs: [
check-changes,
......@@ -707,6 +788,7 @@ jobs:
accuracy-test-1-gpu, accuracy-test-2-gpu,
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
unit-test-backend-4-gpu-b200,
unit-test-disaggregation-2-gpu, unit-test-disaggregation-4-gpu, unit-test-disaggregation-8-gpu,
]
if: always()
runs-on: ubuntu-latest
......
......@@ -138,7 +138,6 @@ suites = {
TestFile("lora/test_lora_tp.py", 116),
TestFile("rl/test_update_weights_from_distributed.py", 103),
TestFile("test_data_parallelism.py", 73),
TestFile("test_disaggregation.py", 499),
TestFile("test_dp_attention.py", 594),
TestFile("test_load_weights_from_remote_instance.py", 72),
TestFile("test_patch_torch.py", 19),
......@@ -157,9 +156,6 @@ suites = {
],
"per-commit-8-gpu": [
TestFile("lora/test_lora_llama4.py", 400),
TestFile("test_disaggregation_dp_attention.py", 155),
TestFile("test_disaggregation_different_tp.py", 600),
TestFile("test_disaggregation_pp.py", 140),
TestFile("test_deepseek_v3_basic.py", 275),
TestFile("test_deepseek_v3_mtp.py", 275),
],
......@@ -173,6 +169,16 @@ suites = {
"per-commit-8-gpu-deepep": [
TestFile("ep/test_deepep_large.py", 338),
],
"per-commit-2-gpu-disaggregation": [
TestFile("test_disaggregation_basic.py", 400),
],
"per-commit-4-gpu-disaggregation": [
TestFile("test_disaggregation_dp_attention.py", 155),
],
"per-commit-8-gpu-disaggregation": [
TestFile("test_disaggregation_different_tp.py", 600),
TestFile("test_disaggregation_pp.py", 140),
],
"per-commit-8-gpu-h20": [
TestFile("quant/test_w4a8_deepseek_v3.py", 371),
],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment