Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7ad7adb6
Unverified
Commit
7ad7adb6
authored
Aug 09, 2025
by
Or Ozeri
Committed by
GitHub
Aug 08, 2025
Browse files
v1: Pass KVConnectorOutput to scheduler-side (#22157)
Signed-off-by:
Or Ozeri
<
oro@il.ibm.com
>
parent
6ade99ea
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
0 deletions
+22
-0
vllm/distributed/kv_transfer/kv_connector/v1/base.py
vllm/distributed/kv_transfer/kv_connector/v1/base.py
+13
-0
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
...istributed/kv_transfer/kv_connector/v1/multi_connector.py
+5
-0
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+4
-0
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/base.py
View file @
7ad7adb6
...
...
@@ -12,6 +12,8 @@ The class provides the following primitives:
times for a given request and should be side-effect free.
update_state_after_alloc() - update KVConnector state after
temporary buffer alloc by the CacheManager.
update_connector_output() - update KVConnector state after
output is received from worker-side connectors.
request_finished() - called when a request is finished, with
the computed kv cache blocks for the request.
Returns whether KV cache should be freed now or will be
...
...
@@ -38,6 +40,7 @@ import torch
from
vllm.logger
import
init_logger
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.outputs
import
KVConnectorOutput
if
TYPE_CHECKING
:
from
vllm.attention.backends.abstract
import
AttentionMetadata
...
...
@@ -283,6 +286,16 @@ class KVConnectorBase_V1(ABC):
"""
pass
def
update_connector_output
(
self
,
connector_output
:
KVConnectorOutput
):
"""
Update KVConnector state from worker-side connectors output.
Args:
connector_output (KVConnectorOutput): the worker-side
connectors output.
"""
return
def
request_finished
(
self
,
request
:
"Request"
,
...
...
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
View file @
7ad7adb6
...
...
@@ -14,6 +14,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
from
vllm.logger
import
init_logger
from
vllm.v1.core.kv_cache_manager
import
KVCacheBlocks
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.outputs
import
KVConnectorOutput
if
TYPE_CHECKING
:
from
vllm.attention.backends.abstract
import
AttentionMetadata
...
...
@@ -177,6 +178,10 @@ class MultiConnector(KVConnectorBase_V1):
self
.
_extra_async_saves
=
{}
return
metadata
def
update_connector_output
(
self
,
connector_output
:
KVConnectorOutput
):
for
c
in
self
.
_connectors
:
c
.
update_connector_output
(
connector_output
)
def
request_finished
(
self
,
request
:
"Request"
,
...
...
vllm/v1/core/sched/scheduler.py
View file @
7ad7adb6
...
...
@@ -1150,6 +1150,10 @@ class Scheduler(SchedulerInterface):
# if finished_recving: add to state so we can
scheduler the request during the next step.
"""
assert
self
.
connector
is
not
None
self
.
connector
.
update_connector_output
(
kv_connector_output
)
# KV Connector:: update recv and send status from last step.
for
req_id
in
(
kv_connector_output
.
finished_recving
or
()):
logger
.
debug
(
"Finished recving KV transfer for request %s"
,
req_id
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment