Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1f400c58
Unverified
Commit
1f400c58
authored
Nov 21, 2025
by
Wentao Ye
Committed by
GitHub
Nov 21, 2025
Browse files
[CI] Add batch invariant test to ci (#27842)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
711241c1
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
1 deletion
+16
-1
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+12
-0
tests/v1/determinism/test_batch_invariance.py
tests/v1/determinism/test_batch_invariance.py
+2
-0
tests/v1/determinism/utils.py
tests/v1/determinism/utils.py
+2
-1
No files found.
.buildkite/test-pipeline.yaml
View file @
1f400c58
...
...
@@ -346,6 +346,18 @@ steps:
commands
:
-
pytest -v -s v1/attention
-
label
:
Batch Invariance Tests (H100)
# 10min
timeout_in_minutes
:
25
gpu
:
h100
source_file_dependencies
:
-
vllm/
-
tests/v1/determinism/
commands
:
-
export VLLM_WORKER_MULTIPROC_METHOD=spawn
-
pip install pytest-timeout pytest-forked
-
pytest -v -s v1/determinism/test_batch_invariance.py
-
pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
-
label
:
V1 Test attention (B200)
# 10min
timeout_in_minutes
:
30
gpu
:
b200
...
...
tests/v1/determinism/test_batch_invariance.py
View file @
1f400c58
...
...
@@ -190,6 +190,7 @@ def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
max_num_seqs
=
32
,
max_model_len
=
8192
,
dtype
=
"bfloat16"
,
# not everything is supported
gpu_memory_utilization
=
0.9
,
)
# Use more realistic prompts for better token generation
...
...
@@ -444,6 +445,7 @@ def test_logprobs_without_batch_invariance_should_fail(
monkeypatch
.
setenv
(
"VLLM_ATTENTION_BACKEND"
,
backend
)
# CRITICAL: Disable batch invariance for this test
monkeypatch
.
setenv
(
"VLLM_BATCH_INVARIANT"
,
"0"
)
monkeypatch
.
setattr
(
batch_invariant
,
"VLLM_BATCH_INVARIANT"
,
False
)
seed
=
int
(
os
.
getenv
(
"VLLM_TEST_SEED"
,
"12345"
))
random
.
seed
(
seed
)
...
...
tests/v1/determinism/utils.py
View file @
1f400c58
...
...
@@ -6,6 +6,7 @@ import random
import
pytest
import
torch
from
vllm.attention.utils.fa_utils
import
flash_attn_supports_mla
from
vllm.platforms
import
current_platform
skip_unsupported
=
pytest
.
mark
.
skipif
(
...
...
@@ -18,7 +19,7 @@ BACKENDS: list[str] = [
"FLASHINFER"
,
]
if
current_platform
.
is_cuda
()
and
current_platform
.
is_device_capability
(
90
):
if
flash_attn_supports_mla
(
):
BACKENDS
.
append
(
"FLASH_ATTN_MLA"
)
DEFAULT_MODEL
=
"Qwen/Qwen3-1.7B"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment