Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
def8f522
Unverified
Commit
def8f522
authored
Apr 20, 2026
by
Sage Moore
Committed by
GitHub
Apr 20, 2026
Browse files
[CI][EPLB] Add Async EPLB end-to-end integration test to CI (#40168)
Signed-off-by:
Sage Moore
<
sage@neuralmagic.com
>
parent
38fa87ca
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
0 deletions
+64
-0
.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh
...eduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh
+55
-0
.buildkite/test_areas/e2e_integration.yaml
.buildkite/test_areas/e2e_integration.yaml
+9
-0
No files found.
.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh
0 → 100755
View file @
def8f522
#!/usr/bin/env bash
set
-euxo
pipefail
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
THRESHOLD
=
${
1
:-
0
.8
}
NUM_Q
=
${
2
:-
1319
}
PORT
=
${
3
:-
8050
}
OUT_DIR
=
${
OUT_DIR
:-
/tmp/vllm-scheduled
}
mkdir
-p
"
${
OUT_DIR
}
"
wait_for_server
()
{
local
port
=
$1
timeout
600 bash
-c
'
until curl -sf "http://127.0.0.1:'
"
$port
"
'/health" > /dev/null; do
sleep 1
done'
}
MODEL
=
"Qwen/Qwen3-30B-A3B-FP8"
BACK
=
"allgather_reducescatter"
cleanup
()
{
if
[[
-n
"
${
SERVER_PID
:-}
"
]]
&&
kill
-0
"
${
SERVER_PID
}
"
2>/dev/null
;
then
kill
"
${
SERVER_PID
}
"
2>/dev/null
||
true
for
_
in
{
1..20
}
;
do
kill
-0
"
${
SERVER_PID
}
"
2>/dev/null
||
break
sleep
0.5
done
kill
-9
"
${
SERVER_PID
}
"
2>/dev/null
||
true
fi
}
trap
cleanup EXIT
VLLM_DEEP_GEMM_WARMUP
=
skip
\
vllm serve
"
$MODEL
"
\
--enforce-eager
\
--data-parallel-size
4
\
--enable-expert-parallel
\
--enable-eplb
\
--all2all-backend
"
$BACK
"
\
--eplb-config
'{"window_size":20, "step_interval":100, "use_async":true}'
\
--trust-remote-code
\
--max-model-len
2048
\
--port
"
$PORT
"
&
SERVER_PID
=
$!
wait_for_server
"
$PORT
"
TAG
=
$(
echo
"
$MODEL
"
|
tr
'/: \\n'
'_____'
)
OUT
=
"
${
OUT_DIR
}
/
${
TAG
}
_
${
BACK
}
.json"
python3 tests/evals/gsm8k/gsm8k_eval.py
--host
http://127.0.0.1
--port
"
$PORT
"
--num-questions
"
${
NUM_Q
}
"
--save-results
"
${
OUT
}
"
python3 -
<<
PY
import json; acc=json.load(open('
${
OUT
}
'))['accuracy']
print(f"
${
MODEL
}
${
BACK
}
: accuracy {acc:.3f}")
assert acc >=
${
THRESHOLD
}
, f"
${
MODEL
}
${
BACK
}
accuracy {acc}"
PY
.buildkite/test_areas/e2e_integration.yaml
View file @
def8f522
...
@@ -29,6 +29,15 @@ steps:
...
@@ -29,6 +29,15 @@ steps:
commands
:
commands
:
-
bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2
1
-
bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2
1
-
label
:
Qwen3-30B-A3B-FP8 DP4 Async EPLB Accuracy
timeout_in_minutes
:
60
device
:
h100
optional
:
true
num_devices
:
4
working_dir
:
"
/vllm-workspace"
commands
:
-
bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_dp4_async_eplb.sh 0.8 200
8050
-
label
:
DeepSeek V2-Lite Prefetch Offload Accuracy (H100)
-
label
:
DeepSeek V2-Lite Prefetch Offload Accuracy (H100)
timeout_in_minutes
:
60
timeout_in_minutes
:
60
device
:
h100
device
:
h100
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment