Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
39ac6404
Unverified
Commit
39ac6404
authored
Apr 15, 2026
by
Wentao Ye
Committed by
GitHub
Apr 15, 2026
Browse files
[Bug] Fix batch invariant test issue, bs=1 with `max_seq_num = 1` (#39320)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
0b790a25
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
25 deletions
+11
-25
tests/v1/determinism/test_batch_invariance.py
tests/v1/determinism/test_batch_invariance.py
+11
-25
No files found.
tests/v1/determinism/test_batch_invariance.py
View file @
39ac6404
...
...
@@ -36,10 +36,10 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
using the high-level v1 LLM() API only (no manual batching).
Strategy:
- Create
two
LLM engine
s with identical config except max_num_seqs: 1 vs N
.
- Compute a baseline output for the needle prompt w
ith the bs=1 engi
ne.
- For many trials, generate a batch (size N) where the needle appears
at a
random position among random filler prompts using the
bs=N
engine.
- Create
a single
LLM engine
configured for the larger batch limit (N)
.
- Compute a baseline output for the needle prompt w
hen it is run alo
ne.
- For many trials, generate a
mixed
batch (size N) where the needle appears
at a
random position among random filler prompts using the
same
engine.
- Track how many trials match vs mismatch, and report totals at the end.
The test fails if any mismatches occur, but we still dump pass/fail
counts.
...
...
@@ -83,11 +83,9 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
needle_prompt
=
"There once was a "
llm_bs1
=
None
llm_bsN
=
None
llm
=
None
try
:
# Engine with bs=1 behavior
llm_bs1
=
LLM_with_max_seqs
(
llm
=
LLM_with_max_seqs
(
model
=
model
,
max_num_seqs
=
max_batch_size
,
gpu_memory_utilization
=
gpu_mem_util
,
...
...
@@ -96,20 +94,11 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
)
# Baseline generation for the needle prompt alone.
baseline_out
=
llm
_bs1
.
generate
([
needle_prompt
],
sampling
)
baseline_out
=
llm
.
generate
([
needle_prompt
],
sampling
)
assert
len
(
baseline_out
)
==
1
assert
len
(
baseline_out
[
0
].
outputs
)
>=
1
baseline_text
=
baseline_out
[
0
].
outputs
[
0
].
text
# Engine with larger batch limit (e.g., 64)
llm_bsN
=
LLM_with_max_seqs
(
model
=
model
,
max_num_seqs
=
max_batch_size
,
gpu_memory_utilization
=
gpu_mem_util
,
max_model_len
=
max_model_len
,
attention_config
=
attention_config
,
)
mismatches
=
0
for
trial
in
range
(
num_trials
):
...
...
@@ -124,8 +113,8 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
else
:
prompts
.
append
(
_random_prompt
(
min_random_prompt
,
max_random_prompt
))
# Generate with the larger
-
batch
engine
outputs
=
llm
_bsN
.
generate
(
prompts
,
sampling
)
# Generate with the
same engine but in a
larger
batch
.
outputs
=
llm
.
generate
(
prompts
,
sampling
)
# Find the needle output by position
needle_output
=
outputs
[
needle_pos
]
assert
needle_output
.
prompt
==
needle_prompt
...
...
@@ -151,12 +140,9 @@ def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(
finally
:
# Ensure engines are shutdown to free GPU/VRAM across test sessions
if
llm
_bs1
is
not
None
:
if
llm
is
not
None
:
with
contextlib
.
suppress
(
Exception
):
llm_bs1
.
shutdown
()
if
llm_bsN
is
not
None
:
with
contextlib
.
suppress
(
Exception
):
llm_bsN
.
shutdown
()
llm
.
shutdown
()
@
skip_unsupported
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment