Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0dd6cf16
Unverified
Commit
0dd6cf16
authored
Oct 17, 2025
by
Hank Han
Committed by
GitHub
Oct 16, 2025
Browse files
[ci]use H20 to run disaggregation test (#11543)
parent
0975ba99
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
75 additions
and
128 deletions
+75
-128
.github/workflows/pr-test-h20.yml
.github/workflows/pr-test-h20.yml
+0
-106
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+33
-0
python/sglang/test/test_disaggregation_utils.py
python/sglang/test/test_disaggregation_utils.py
+33
-15
test/srt/run_suite.py
test/srt/run_suite.py
+2
-2
test/srt/test_disaggregation_different_tp.py
test/srt/test_disaggregation_different_tp.py
+5
-4
test/srt/test_disaggregation_pp.py
test/srt/test_disaggregation_pp.py
+2
-1
No files found.
.github/workflows/pr-test-h20.yml
deleted
100644 → 0
View file @
0975ba99
name
:
PR Test (H20)
on
:
push
:
branches
:
[
main
]
pull_request
:
branches
:
[
main
]
types
:
[
synchronize
,
labeled
]
workflow_dispatch
:
inputs
:
version
:
required
:
true
type
:
choice
default
:
'
release'
options
:
-
'
release'
-
'
nightly'
concurrency
:
group
:
pr-test-h20-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
check-changes
:
runs-on
:
ubuntu-latest
outputs
:
h20_files
:
${{ steps.filter.outputs.h20_files }}
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Fail if the PR does not have the 'run-ci' label
if
:
github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
run
:
|
echo "This pull request does not have the 'run-ci' label. Failing the workflow."
exit 1
-
name
:
Fail if the PR is a draft
if
:
github.event_name == 'pull_request' && github.event.pull_request.draft ==
true
run
:
|
echo "This pull request is a draft. Failing the workflow."
exit 1
-
name
:
Detect file changes
id
:
filter
uses
:
dorny/paths-filter@v3
with
:
filters
:
|
h20_files:
- "python/sglang/srt/models/deepseek*"
- "python/sglang/srt/layers/moe/**"
- ".github/workflows/pr-test-h20.yml"
- "python/pyproject.toml"
per-commit-8-gpu-h20
:
needs
:
[
check-changes
]
if
:
needs.check-changes.outputs.h20_files == 'true'
runs-on
:
8-gpu-h20
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Install dependencies
run
:
|
bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-h20
pr-test-h20-finish
:
needs
:
[
check-changes
,
per-commit-8-gpu-h20
,
]
if
:
always()
runs-on
:
ubuntu-latest
steps
:
-
name
:
Check all dependent job statuses
run
:
|
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0
.github/workflows/pr-test.yml
View file @
0dd6cf16
...
...
@@ -350,6 +350,39 @@ jobs:
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
unit-test-backend-8-gpu-h20
:
needs
:
[
check-changes
,
unit-test-backend-2-gpu
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
runs-on
:
8-gpu-h20
env
:
SGLANG_CI_RDMA_ALL_DEVICES
:
"
mlx5_1,mlx5_2,mlx5_3,mlx5_4"
strategy
:
fail-fast
:
false
matrix
:
part
:
[
0
,
1
]
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Download artifacts
if
:
needs.check-changes.outputs.sgl_kernel == 'true'
uses
:
actions/download-artifact@v4
with
:
path
:
sgl-kernel/dist/
merge-multiple
:
true
pattern
:
wheel-python3.10-cuda12.9
-
name
:
Install dependencies
run
:
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
performance-test-1-gpu-part-1
:
needs
:
[
check-changes
,
sgl-kernel-build-wheels
]
if
:
always() && !failure() && !cancelled() &&
...
...
python/sglang/test/test_disaggregation_utils.py
View file @
0dd6cf16
import
logging
import
os
import
time
import
warnings
...
...
@@ -15,6 +16,8 @@ from sglang.test.test_utils import (
popen_with_error_check
,
)
logger
=
logging
.
getLogger
(
__name__
)
class
TestDisaggregationBase
(
CustomTestCase
):
@
classmethod
...
...
@@ -100,11 +103,28 @@ class TestDisaggregationBase(CustomTestCase):
def
get_rdma_devices_args
():
def
_parse_list_env
(
var_name
:
str
):
val
=
os
.
getenv
(
var_name
)
if
not
val
:
return
None
items
=
[
x
.
strip
()
for
x
in
val
.
split
(
","
)
if
x
.
strip
()]
return
items
or
None
def
_pick_default_pair
(
rdma_all_devices
):
return
[
rdma_all_devices
[
0
],
rdma_all_devices
[
len
(
rdma_all_devices
)
//
2
]]
rdma_all_devices
=
_parse_list_env
(
"SGLANG_CI_RDMA_ALL_DEVICES"
)
or
[
f
"mlx5_roce
{
i
}
"
for
i
in
range
(
8
)
]
logger
.
info
(
"Resolved rdma_all_devices=%s"
,
rdma_all_devices
)
n_rdma
=
len
(
rdma_all_devices
)
# 1. Get visible GPU indices
cuda_visible_devices
=
os
.
getenv
(
"CUDA_VISIBLE_DEVICES"
)
if
not
cuda_visible_devices
:
warnings
.
warn
(
"CUDA_VISIBLE_DEVICES is not set. Using default RDMA devices."
)
return
"
mlx5_roce0,mlx5_roce4"
return
"
,"
.
join
(
_pick_default_pair
(
rdma_all_devices
))
try
:
# Convert to list of integers (handling possible spaces and empty strings)
...
...
@@ -112,29 +132,27 @@ def get_rdma_devices_args():
int
(
idx
.
strip
())
for
idx
in
cuda_visible_devices
.
split
(
","
)
if
idx
.
strip
()
]
if
not
gpu_indices
or
len
(
gpu_indices
)
>
4
:
return
"
mlx5_roce0,mlx5_roce4"
return
"
,"
.
join
(
_pick_default_pair
(
rdma_all_devices
))
except
ValueError
:
warnings
.
warn
(
f
"Invalid CUDA_VISIBLE_DEVICES format:
{
cuda_visible_devices
}
"
)
return
"
mlx5_roce0,mlx5_roce4"
return
"
,"
.
join
(
_pick_default_pair
(
rdma_all_devices
))
# 2. Calculate base RDMA index group (each group of 4 GPUs uses consecutive devices)
base_rdma_group
=
min
(
gpu_indices
)
//
4
*
4
# 3. Generate RDMA device names
rdma_devices
=
[]
base_rdma_group
=
(
min
(
gpu_indices
)
//
4
)
*
4
for
gpu_idx
in
gpu_indices
:
# Validate GPU index within expected range
if
gpu_idx
<
base_rdma_group
or
gpu_idx
>=
base_rdma_group
+
4
:
if
not
(
base_rdma_group
<=
gpu_idx
<
base_rdma_group
+
4
):
warnings
.
warn
(
f
"GPU index
{
gpu_idx
}
is outside expected group
{
base_rdma_group
}
-
{
base_rdma_group
+
3
}
"
f
"GPU index
{
gpu_idx
}
is outside expected group "
f
"
{
base_rdma_group
}
-
{
base_rdma_group
+
3
}
"
)
continue
# Map GPU index to RDMA device index
rdma_index
=
base_rdma_group
//
4
*
4
+
(
gpu_idx
%
4
)
rdma_devices
.
append
(
f
"mlx5_roce
{
rdma_index
}
"
)
# 3. Generate RDMA device names
rdma_devices
=
[]
for
gpu_idx
in
gpu_indices
:
nic_index
=
gpu_idx
//
(
8
//
n_rdma
)
rdma_devices
.
append
(
rdma_all_devices
[
nic_index
])
if
not
rdma_devices
:
return
"
mlx5_roce0,mlx5_roce4"
return
"
,"
.
join
(
_pick_default_pair
(
rdma_all_devices
))
return
","
.
join
(
rdma_devices
)
test/srt/run_suite.py
View file @
0dd6cf16
...
...
@@ -163,9 +163,7 @@ suites = {
TestFile
(
"lora/test_lora_llama4.py"
,
400
),
TestFile
(
"test_deepseek_v3_basic.py"
,
275
),
TestFile
(
"test_deepseek_v3_mtp.py"
,
275
),
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_hybrid_attention.py"
,
200
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
],
"per-commit-4-gpu-b200"
:
[
# TestFile("test_gpt_oss_4gpu.py", 600),
...
...
@@ -182,6 +180,8 @@ suites = {
TestFile
(
"test_deepseek_v32_basic.py"
,
275
),
],
"per-commit-8-gpu-h20"
:
[
TestFile
(
"test_disaggregation_different_tp.py"
,
600
),
TestFile
(
"test_disaggregation_pp.py"
,
140
),
TestFile
(
"quant/test_w4a8_deepseek_v3.py"
,
371
),
],
"vllm_dependency_test"
:
[
...
...
test/srt/test_disaggregation_different_tp.py
View file @
0dd6cf16
...
...
@@ -9,6 +9,7 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST_MLA
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_pd_server
,
try_cached_model
,
)
...
...
@@ -19,7 +20,7 @@ class TestDisaggregationMooncakePrefillLargerTP(TestDisaggregationBase):
# Temporarily disable JIT DeepGEMM
envs
.
SGLANG_ENABLE_JIT_DEEPGEMM
.
set
(
False
)
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST_MLA
cls
.
model
=
try_cached_model
(
DEFAULT_MODEL_NAME_FOR_TEST_MLA
)
# Non blocking start servers
cls
.
start_prefill
()
...
...
@@ -90,7 +91,7 @@ class TestDisaggregationMooncakeDecodeLargerTP(TestDisaggregationBase):
# Temporarily disable JIT DeepGEMM
envs
.
SGLANG_ENABLE_JIT_DEEPGEMM
.
set
(
False
)
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST_MLA
cls
.
model
=
try_cached_model
(
DEFAULT_MODEL_NAME_FOR_TEST_MLA
)
# Non blocking start servers
cls
.
start_prefill
()
...
...
@@ -161,7 +162,7 @@ class TestDisaggregationMooncakeMHAPrefillLargerTP(TestDisaggregationBase):
# Temporarily disable JIT DeepGEMM
envs
.
SGLANG_ENABLE_JIT_DEEPGEMM
.
set
(
False
)
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
try_cached_model
(
DEFAULT_MODEL_NAME_FOR_TEST
)
# Non blocking start servers
cls
.
start_prefill
()
...
...
@@ -232,7 +233,7 @@ class TestDisaggregationMooncakeMHADecodeLargerTP(TestDisaggregationBase):
# Temporarily disable JIT DeepGEMM
envs
.
SGLANG_ENABLE_JIT_DEEPGEMM
.
set
(
False
)
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
try_cached_model
(
DEFAULT_MODEL_NAME_FOR_TEST
)
# Non blocking start servers
cls
.
start_prefill
()
...
...
test/srt/test_disaggregation_pp.py
View file @
0dd6cf16
...
...
@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_pd_server
,
try_cached_model
,
)
...
...
@@ -15,7 +16,7 @@ class TestDisaggregationPPAccuracy(TestDisaggregationBase):
@
classmethod
def
setUpClass
(
cls
):
super
().
setUpClass
()
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
try_cached_model
(
DEFAULT_MODEL_NAME_FOR_TEST
)
# Non blocking start servers
cls
.
start_prefill
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment