Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
52ee2102
Unverified
Commit
52ee2102
authored
Feb 03, 2026
by
Nick Hill
Committed by
GitHub
Feb 03, 2026
Browse files
[BugFix][Spec Decoding] Fix negative accepted tokens metric crash (#33729)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
655efb3e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
1 deletion
+61
-1
tests/v1/core/test_scheduler.py
tests/v1/core/test_scheduler.py
+60
-0
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+1
-1
No files found.
tests/v1/core/test_scheduler.py
View file @
52ee2102
...
...
@@ -885,6 +885,66 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
assert
stats
.
num_accepted_tokens_per_pos
==
expected
[
3
]
def
test_spec_decoding_stats_empty_output
():
"""Test that spec decoding stats handle empty output tokens gracefully.
This is a regression test for a bug where empty sampled_token_ids
would cause num_accepted = len([]) - 1 = -1, leading to a
ValueError when incrementing a Prometheus counter with a negative value.
"""
num_spec_tokens
=
3
scheduler
=
create_scheduler
(
num_speculative_tokens
=
num_spec_tokens
)
requests
=
create_requests
(
num_requests
=
1
,
num_tokens
=
1
)
request
=
requests
[
0
]
req_id
=
request
.
request_id
scheduler
.
add_request
(
request
)
# Initial schedule (prefill)
output
=
scheduler
.
schedule
()
assert
len
(
output
.
scheduled_new_reqs
)
==
1
# Complete the prefill with a sampled token
model_runner_output
=
ModelRunnerOutput
(
req_ids
=
[
req_id
],
req_id_to_index
=
{
req_id
:
0
},
sampled_token_ids
=
[[
0
]],
logprobs
=
None
,
prompt_logprobs_dict
=
{},
pooler_output
=
[],
)
scheduler
.
update_from_output
(
output
,
model_runner_output
)
# Add draft tokens for speculation
draft_token_ids
=
DraftTokenIds
([
req_id
],
[[
1
,
2
,
3
]])
scheduler
.
update_draft_token_ids
(
draft_token_ids
)
# Schedule the speculated tokens for validation
output
=
scheduler
.
schedule
()
assert
req_id
in
output
.
scheduled_spec_decode_tokens
assert
len
(
output
.
scheduled_spec_decode_tokens
[
req_id
])
==
3
# Simulate empty output tokens (e.g., due to request abortion or error)
# This would previously cause num_accepted = -1 and crash
model_runner_output
=
ModelRunnerOutput
(
req_ids
=
[
req_id
],
req_id_to_index
=
{
req_id
:
0
},
sampled_token_ids
=
[[]],
# Empty output tokens
logprobs
=
None
,
prompt_logprobs_dict
=
{},
pooler_output
=
[],
)
# This should not raise an error
engine_core_outputs
=
scheduler
.
update_from_output
(
output
,
model_runner_output
)
# Spec decoding stats should be None since no tokens were generated
scheduler_stats
=
(
engine_core_outputs
[
0
].
scheduler_stats
if
engine_core_outputs
else
None
)
assert
scheduler_stats
is
None
or
scheduler_stats
.
spec_decoding_stats
is
None
def
_assert_right_scheduler_output
(
output
:
SchedulerOutput
,
num_requests
:
int
,
...
...
vllm/v1/core/sched/scheduler.py
View file @
52ee2102
...
...
@@ -1276,7 +1276,7 @@ class Scheduler(SchedulerInterface):
scheduled_spec_token_ids
=
(
scheduler_output
.
scheduled_spec_decode_tokens
.
get
(
req_id
)
)
if
scheduled_spec_token_ids
:
if
scheduled_spec_token_ids
and
generated_token_ids
:
num_draft_tokens
=
len
(
scheduled_spec_token_ids
)
num_accepted
=
len
(
generated_token_ids
)
-
1
num_rejected
=
num_draft_tokens
-
num_accepted
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment