Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4bc400f4
Unverified
Commit
4bc400f4
authored
Nov 04, 2025
by
Lucas Wilkinson
Committed by
GitHub
Nov 03, 2025
Browse files
[CI/Testing] Add basic single node dual batch overlap test (#27235)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
cac4c10e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
91 additions
and
0 deletions
+91
-0
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+2
-0
tests/v1/distributed/test_dbo.py
tests/v1/distributed/test_dbo.py
+89
-0
No files found.
.buildkite/test-pipeline.yaml
View file @
4bc400f4
...
...
@@ -1223,6 +1223,7 @@ steps:
-
pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
-
pytest -v -s tests/distributed/test_context_parallel.py
-
CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len
2048
-
pytest -v -s tests/v1/distributed/test_dbo.py
##### B200 test #####
-
label
:
Distributed Tests (B200)
# optional
...
...
@@ -1233,6 +1234,7 @@ steps:
commands
:
-
pytest -v -s tests/distributed/test_context_parallel.py
-
pytest -v -s tests/distributed/test_nccl_symm_mem_allreduce.py
-
pytest -v -s tests/v1/distributed/test_dbo.py
##### RL Integration Tests #####
-
label
:
Prime-RL Integration Test
# 15min
...
...
tests/v1/distributed/test_dbo.py
0 → 100644
View file @
4bc400f4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Test Dual Batch Overlap (DBO) with Data Parallelism + Expert Parallelism.
DBO is specifically designed for DP+EP scenarios to hide communication latency
by overlapping computation of two batches. This test validates that DBO works
correctly with the DeepSeek-V2-Lite model using GSM8K evaluation.
"""
import
pytest
from
tests.evals.gsm8k.gsm8k_eval
import
evaluate_gsm8k
from
tests.utils
import
RemoteOpenAIServer
MODEL_NAME
=
"deepseek-ai/DeepSeek-V2-Lite-Chat"
DP_SIZE
=
2
# GSM8K eval configuration
NUM_QUESTIONS
=
256
# Fast eval for CI; but must be large enough to hit dbo thresholds
NUM_SHOTS
=
5
# Few-shot examples
MIN_ACCURACY
=
0.62
# Expected 0.64 with 2% buffer (based on vLLM test data)
# Increase max_num_seqs to trigger DBO for decode batches
# With 64 seqs, decode batches should exceed the 32 token threshold
MAX_NUM_SEQS
=
64
# Increased from 16 to trigger decode DBO
# DeepEP backends to test
DEEPEP_BACKENDS
=
[
"deepep_low_latency"
,
"deepep_high_throughput"
,
]
@
pytest
.
mark
.
parametrize
(
"all2all_backend"
,
DEEPEP_BACKENDS
)
def
test_dbo_dp_ep_gsm8k
(
all2all_backend
:
str
,
num_gpus_available
):
"""
Test DBO with DP+EP using GSM8K evaluation.
"""
required_gpus
=
DP_SIZE
if
num_gpus_available
<
required_gpus
:
pytest
.
skip
(
f
"Need at least
{
required_gpus
}
GPUs (DP=
{
DP_SIZE
}
)"
)
# Server arguments for DBO + DP + EP
server_args
=
[
"--max-model-len"
,
"4096"
,
"--max-num-seqs"
,
str
(
MAX_NUM_SEQS
),
# Use larger batch to trigger decode DBO
"--trust-remote-code"
,
# Note: Not using --enforce-eager to test DBO's alternate CUDA graph dispatching
"--data-parallel-size"
,
str
(
DP_SIZE
),
"--enable-expert-parallel"
,
"--enable-dbo"
,
# Fix threshold so we know we trigger DBO
"--dbo-decode-token-threshold"
,
"16"
,
"--dbo-prefill-token-threshold"
,
"256"
,
"--all2all-backend"
,
all2all_backend
,
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
server_args
,
max_wait_seconds
=
600
,
# Allow time for model loading with DP+EP
)
as
remote_server
:
# Use host and port directly from RemoteOpenAIServer
host
=
f
"http://
{
remote_server
.
host
}
"
port
=
remote_server
.
port
# Run GSM8K evaluation
results
=
evaluate_gsm8k
(
num_questions
=
NUM_QUESTIONS
,
num_shots
=
NUM_SHOTS
,
host
=
host
,
port
=
port
,
)
# Validate accuracy is reasonable
accuracy
=
results
[
"accuracy"
]
assert
accuracy
>=
MIN_ACCURACY
,
(
f
"DBO+DP+EP accuracy too low (
{
all2all_backend
}
): "
f
"
{
accuracy
:.
3
f
}
<
{
MIN_ACCURACY
:.
3
f
}
"
f
"(correct:
{
results
[
'num_correct'
]
}
/
{
results
[
'num_questions'
]
}
)"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment