Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7ad7adb6
Unverified
Commit
7ad7adb6
authored
Aug 09, 2025
by
Or Ozeri
Committed by
GitHub
Aug 08, 2025
Browse files
v1: Pass KVConnectorOutput to scheduler-side (#22157)
Signed-off-by:
Or Ozeri
<
oro@il.ibm.com
>
parent
6ade99ea
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
0 deletions
+22
-0
vllm/distributed/kv_transfer/kv_connector/v1/base.py
vllm/distributed/kv_transfer/kv_connector/v1/base.py
+13
-0
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
...istributed/kv_transfer/kv_connector/v1/multi_connector.py
+5
-0
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+4
-0
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/base.py
View file @
7ad7adb6
...
@@ -12,6 +12,8 @@ The class provides the following primitives:
...
@@ -12,6 +12,8 @@ The class provides the following primitives:
times for a given request and should be side-effect free.
times for a given request and should be side-effect free.
update_state_after_alloc() - update KVConnector state after
update_state_after_alloc() - update KVConnector state after
temporary buffer alloc by the CacheManager.
temporary buffer alloc by the CacheManager.
update_connector_output() - update KVConnector state after
output is received from worker-side connectors.
request_finished() - called when a request is finished, with
request_finished() - called when a request is finished, with
the computed kv cache blocks for the request.
the computed kv cache blocks for the request.
Returns whether KV cache should be freed now or will be
Returns whether KV cache should be freed now or will be
...
@@ -38,6 +40,7 @@ import torch
...
@@ -38,6 +40,7 @@ import torch
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.outputs
import
KVConnectorOutput
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.attention.backends.abstract
import
AttentionMetadata
from
vllm.attention.backends.abstract
import
AttentionMetadata
...
@@ -283,6 +286,16 @@ class KVConnectorBase_V1(ABC):
...
@@ -283,6 +286,16 @@ class KVConnectorBase_V1(ABC):
"""
"""
pass
pass
def
update_connector_output
(
self
,
connector_output
:
KVConnectorOutput
):
"""
Update KVConnector state from worker-side connectors output.
Args:
connector_output (KVConnectorOutput): the worker-side
connectors output.
"""
return
def
request_finished
(
def
request_finished
(
self
,
self
,
request
:
"Request"
,
request
:
"Request"
,
...
...
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
View file @
7ad7adb6
...
@@ -14,6 +14,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
...
@@ -14,6 +14,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.v1.core.kv_cache_manager
import
KVCacheBlocks
from
vllm.v1.core.kv_cache_manager
import
KVCacheBlocks
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.core.sched.output
import
SchedulerOutput
from
vllm.v1.outputs
import
KVConnectorOutput
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.attention.backends.abstract
import
AttentionMetadata
from
vllm.attention.backends.abstract
import
AttentionMetadata
...
@@ -177,6 +178,10 @@ class MultiConnector(KVConnectorBase_V1):
...
@@ -177,6 +178,10 @@ class MultiConnector(KVConnectorBase_V1):
self
.
_extra_async_saves
=
{}
self
.
_extra_async_saves
=
{}
return
metadata
return
metadata
def
update_connector_output
(
self
,
connector_output
:
KVConnectorOutput
):
for
c
in
self
.
_connectors
:
c
.
update_connector_output
(
connector_output
)
def
request_finished
(
def
request_finished
(
self
,
self
,
request
:
"Request"
,
request
:
"Request"
,
...
...
vllm/v1/core/sched/scheduler.py
View file @
7ad7adb6
...
@@ -1150,6 +1150,10 @@ class Scheduler(SchedulerInterface):
...
@@ -1150,6 +1150,10 @@ class Scheduler(SchedulerInterface):
# if finished_recving: add to state so we can
# if finished_recving: add to state so we can
scheduler the request during the next step.
scheduler the request during the next step.
"""
"""
assert
self
.
connector
is
not
None
self
.
connector
.
update_connector_output
(
kv_connector_output
)
# KV Connector:: update recv and send status from last step.
# KV Connector:: update recv and send status from last step.
for
req_id
in
(
kv_connector_output
.
finished_recving
or
()):
for
req_id
in
(
kv_connector_output
.
finished_recving
or
()):
logger
.
debug
(
"Finished recving KV transfer for request %s"
,
req_id
)
logger
.
debug
(
"Finished recving KV transfer for request %s"
,
req_id
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment