Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
81786c87
Unverified
Commit
81786c87
authored
Dec 25, 2025
by
Nick Hill
Committed by
GitHub
Dec 25, 2025
Browse files
[BugFix] Fix async scheduling + reasoning with struct output (#31332)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
f1531d9f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
3 deletions
+9
-3
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+6
-2
tests/v1/structured_output/test_reasoning_structured_output.py
.../v1/structured_output/test_reasoning_structured_output.py
+1
-0
vllm/v1/structured_output/__init__.py
vllm/v1/structured_output/__init__.py
+2
-1
No files found.
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
81786c87
...
...
@@ -608,7 +608,7 @@ Make the response as short as possible.
@
pytest
.
mark
.
parametrize
(
"model_name, backend, tokenizer_mode, reasoning_parser, speculative_config"
,
# noqa: E501
"model_name, backend, tokenizer_mode, reasoning_parser, speculative_config
, async_scheduling
"
,
# noqa: E501
[
(
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
,
...
...
@@ -616,8 +616,10 @@ Make the response as short as possible.
"auto"
,
"deepseek_r1"
,
NGRAM_SPEC_CONFIG
,
False
,
),
(
"Qwen/Qwen3-1.7B"
,
"xgrammar"
,
"auto"
,
"deepseek_r1"
,
None
),
(
"Qwen/Qwen3-1.7B"
,
"xgrammar"
,
"auto"
,
"deepseek_r1"
,
None
,
False
),
(
"Qwen/Qwen3-1.7B"
,
"xgrammar"
,
"auto"
,
"deepseek_r1"
,
None
,
True
),
],
)
def
test_structured_output_with_reasoning_matrices
(
...
...
@@ -626,6 +628,7 @@ def test_structured_output_with_reasoning_matrices(
reasoning_parser
:
str
,
model_name
:
str
,
speculative_config
:
dict
[
str
,
Any
]
|
None
,
async_scheduling
:
bool
,
):
if
current_platform
.
is_tpu
()
and
speculative_config
:
pytest
.
skip
(
"TPU does not support speculative decoding"
)
...
...
@@ -646,6 +649,7 @@ def test_structured_output_with_reasoning_matrices(
),
tokenizer_mode
=
tokenizer_mode
,
speculative_config
=
speculative_config
,
async_scheduling
=
async_scheduling
,
)
tokenizer
=
llm
.
get_tokenizer
()
reasoner
=
ReasoningParserManager
.
get_reasoning_parser
(
reasoning_parser
)(
...
...
tests/v1/structured_output/test_reasoning_structured_output.py
View file @
81786c87
...
...
@@ -71,6 +71,7 @@ class TestReasoningStructuredOutput:
request
.
prompt_token_ids
=
[
1
,
2
,
3
,
4
,
5
]
request
.
all_token_ids
=
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
]
request
.
num_computed_tokens
=
5
request
.
num_output_placeholders
=
0
return
request
def
test_should_fill_bitmask_with_enable_in_reasoning
(
...
...
vllm/v1/structured_output/__init__.py
View file @
81786c87
...
...
@@ -339,8 +339,9 @@ class StructuredOutputManager:
return
True
# Check if reasoning ends in *this* step
delta_from
=
request
.
num_computed_tokens
-
request
.
num_output_placeholders
if
self
.
reasoner
.
is_reasoning_end_streaming
(
request
.
all_token_ids
,
request
.
all_token_ids
[
request
.
num_computed_tokens
:]
request
.
all_token_ids
,
request
.
all_token_ids
[
delta_from
:]
):
# Reasoning just ended, so we shouldn't advance til
# next pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment