Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
80b6080d
Unverified
Commit
80b6080d
authored
Nov 16, 2025
by
Nick Hill
Committed by
GitHub
Nov 17, 2025
Browse files
[BugFix] Fix async scheduling + chunked prefill + preemption (#28787)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
03ee4811
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
9 deletions
+8
-9
tests/v1/e2e/test_async_scheduling.py
tests/v1/e2e/test_async_scheduling.py
+4
-6
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+1
-3
vllm/v1/utils.py
vllm/v1/utils.py
+3
-0
No files found.
tests/v1/e2e/test_async_scheduling.py
View file @
80b6080d
...
...
@@ -65,9 +65,8 @@ def test_without_spec_decoding(
(
True
,
"mp"
,
True
,
None
,
False
),
(
True
,
"uni"
,
True
,
None
,
False
),
(
False
,
"mp"
,
True
,
None
,
True
),
# Async scheduling + preemption + chunked prefill needs to be fixed (WIP)
# (True, "mp", True, None, True),
# (True, "uni", True, None, True),
(
True
,
"mp"
,
True
,
None
,
True
),
(
True
,
"uni"
,
True
,
None
,
True
),
]
run_tests
(
...
...
@@ -103,9 +102,8 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
(
False
,
"mp"
,
True
,
spec_config_short
,
True
),
(
True
,
"uni"
,
True
,
spec_config
,
False
),
(
True
,
"uni"
,
True
,
spec_config_short
,
False
),
# Async scheduling + preemption + chunked prefill needs to be fixed (WIP)
# (True, "mp", True, spec_config, True),
# (True, "uni", True, spec_config_short, True),
(
True
,
"mp"
,
True
,
spec_config
,
True
),
(
True
,
"uni"
,
True
,
spec_config_short
,
True
),
]
run_tests
(
...
...
vllm/v1/core/sched/scheduler.py
View file @
80b6080d
...
...
@@ -778,9 +778,7 @@ class Scheduler(SchedulerInterface):
assert
not
scheduled_in_prev_step
resumed_req_ids
.
add
(
req_id
)
if
not
scheduled_in_prev_step
:
all_token_ids
[
req_id
]
=
req
.
all_token_ids
[
:
req
.
num_computed_tokens
+
num_tokens
]
all_token_ids
[
req_id
]
=
req
.
all_token_ids
.
copy
()
new_block_ids
.
append
(
req_to_new_blocks
[
req_id
].
get_block_ids
(
allow_none
=
True
)
)
...
...
vllm/v1/utils.py
View file @
80b6080d
...
...
@@ -97,6 +97,9 @@ class ConstantList(Generic[T], Sequence):
def
__repr__
(
self
):
return
f
"ConstantList(
{
self
.
_x
}
)"
def
copy
(
self
)
->
list
[
T
]:
return
self
.
_x
.
copy
()
class
CpuGpuBuffer
:
"""Buffer to easily copy tensors between CPU and GPU."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment