Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1cec5b7e
Unverified
Commit
1cec5b7e
authored
Dec 13, 2025
by
Nick Hill
Committed by
GitHub
Dec 13, 2025
Browse files
[Scheduer] Simplify stop checking for pooling models (#30591)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
b09806e2
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
16 deletions
+7
-16
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+5
-6
vllm/v1/core/sched/utils.py
vllm/v1/core/sched/utils.py
+2
-10
No files found.
vllm/v1/core/sched/scheduler.py
View file @
1cec5b7e
...
...
@@ -1117,6 +1117,7 @@ class Scheduler(SchedulerInterface):
stopped
=
False
new_logprobs
=
None
new_token_ids
=
generated_token_ids
pooler_output
=
pooler_outputs
[
req_index
]
if
pooler_outputs
else
None
kv_transfer_params
=
None
status_before_stop
=
request
.
status
...
...
@@ -1125,12 +1126,10 @@ class Scheduler(SchedulerInterface):
new_token_ids
,
stopped
=
self
.
_update_request_with_output
(
request
,
new_token_ids
)
# Stop checking for pooler models.
pooler_output
=
None
if
pooler_outputs
:
pooler_output
=
pooler_outputs
[
req_index
]
stopped
=
check_stop
(
request
,
self
.
max_model_len
,
pooler_output
)
elif
request
.
pooling_params
and
pooler_output
is
not
None
:
# Pooling stops as soon as there is output.
request
.
status
=
RequestStatus
.
FINISHED_STOPPED
stopped
=
True
if
stopped
:
kv_transfer_params
=
self
.
_free_request
(
request
)
...
...
vllm/v1/core/sched/utils.py
View file @
1cec5b7e
...
...
@@ -2,8 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
contextlib
import
torch
from
vllm.v1.request
import
Request
,
RequestStatus
...
...
@@ -39,14 +37,8 @@ def remove_all(lst: list, items_to_remove: set) -> list:
return
[
item
for
item
in
lst
if
item
not
in
items_to_remove
]
def
check_stop
(
request
:
Request
,
max_model_len
:
int
,
pooler_output
:
torch
.
Tensor
|
None
=
None
)
->
bool
:
if
request
.
pooling_params
:
if
pooler_output
is
not
None
:
request
.
status
=
RequestStatus
.
FINISHED_STOPPED
return
True
return
False
def
check_stop
(
request
:
Request
,
max_model_len
:
int
)
->
bool
:
assert
not
request
.
pooling_params
sampling_params
=
request
.
sampling_params
assert
sampling_params
is
not
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment