Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
67fc16cd
Unverified
Commit
67fc16cd
authored
Nov 25, 2025
by
wang.yuqi
Committed by
GitHub
Nov 25, 2025
Browse files
[Bugfix] If chunked_prefill is disabled, end the scheduling early. (#28911)
Signed-off-by:
wang.yuqi
<
yuqi.wang@daocloud.io
>
parent
6330f947
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
4 deletions
+33
-4
tests/v1/core/test_scheduler.py
tests/v1/core/test_scheduler.py
+28
-0
tests/v1/core/utils.py
tests/v1/core/utils.py
+2
-1
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+3
-3
No files found.
tests/v1/core/test_scheduler.py
View file @
67fc16cd
...
...
@@ -641,6 +641,34 @@ def test_schedule_concurrent_batches(
scheduler
.
update_from_output
(
scheduler_output1
,
model_runner_output
)
@
pytest
.
mark
.
parametrize
(
"enable_chunked_prefill"
,
[
True
,
False
])
def
test_schedule_order
(
enable_chunked_prefill
:
bool
):
scheduler
=
create_scheduler
(
max_num_batched_tokens
=
1024
,
max_num_seqs
=
3
,
enable_chunked_prefill
=
enable_chunked_prefill
,
)
# long requests
requests
=
create_requests
(
num_requests
=
2
,
num_tokens
=
800
)
# short requests
requests
+=
create_requests
(
num_requests
=
2
,
num_tokens
=
10
)
for
request
in
requests
:
scheduler
.
add_request
(
request
)
scheduler_output1
=
scheduler
.
schedule
()
if
enable_chunked_prefill
:
# When enable chunked prefill, long requests will be chunked.
assert
len
(
scheduler_output1
.
scheduled_new_reqs
)
==
2
else
:
# When disable chunked prefill, should not skip the long requests,
# and scheduling subsequent short requests in advance,
# even though there is still token budgets remaining.
assert
len
(
scheduler_output1
.
scheduled_new_reqs
)
==
1
def
test_preempt_during_execution
():
# NOTE(woosuk): The actual number of available blocks is 10 instead of 11
# because block 0 is reserved as the null block.
...
...
tests/v1/core/utils.py
View file @
67fc16cd
...
...
@@ -42,6 +42,7 @@ def create_scheduler(
model
:
str
=
"facebook/opt-125m"
,
max_num_seqs
:
int
=
16
,
max_num_batched_tokens
:
int
=
8192
,
enable_chunked_prefill
:
bool
=
True
,
enable_prefix_caching
:
bool
=
False
,
long_prefill_token_threshold
:
int
=
0
,
disable_chunked_mm_input
:
bool
=
False
,
...
...
@@ -76,7 +77,7 @@ def create_scheduler(
max_model_len
=
max_model_len
,
long_prefill_token_threshold
=
long_prefill_token_threshold
,
disable_chunked_mm_input
=
disable_chunked_mm_input
,
enable_chunked_prefill
=
True
,
enable_chunked_prefill
=
enable_chunked_prefill
,
async_scheduling
=
async_scheduling
,
)
model_config
=
ModelConfig
(
...
...
vllm/v1/core/sched/scheduler.py
View file @
67fc16cd
...
...
@@ -508,9 +508,9 @@ class Scheduler(SchedulerInterface):
not
self
.
scheduler_config
.
enable_chunked_prefill
and
num_new_tokens
>
token_budget
):
self
.
waiting
.
pop_request
()
skipped_waiting_requests
.
prepend_request
(
request
)
continue
# If chunked_prefill is disabled,
# we can stop the scheduling here.
break
num_new_tokens
=
min
(
num_new_tokens
,
token_budget
)
assert
num_new_tokens
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment