Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cda92307
Unverified
Commit
cda92307
authored
Jun 17, 2025
by
Jiayi Yao
Committed by
GitHub
Jun 17, 2025
Browse files
[Misc] Update lmcache connector with the latest connector apis (#19441)
Signed-off-by:
YaoJiayi
<
120040070@link.cuhk.edu.cn
>
parent
bf57ccc5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
1 deletion
+34
-1
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py
...tributed/kv_transfer/kv_connector/v1/lmcache_connector.py
+34
-1
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py
View file @
cda92307
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
TYPE_CHECKING
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
import
torch
import
torch
from
lmcache.integration.vllm.vllm_v1_adapter
import
LMCacheConnectorV1Impl
from
lmcache.integration.vllm.vllm_v1_adapter
import
LMCacheConnectorV1Impl
...
@@ -87,6 +87,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
...
@@ -87,6 +87,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
"""
"""
self
.
_lmcache_engine
.
wait_for_save
()
self
.
_lmcache_engine
.
wait_for_save
()
def
get_finished
(
self
,
finished_req_ids
:
set
[
str
]
)
->
tuple
[
Optional
[
set
[
str
]],
Optional
[
set
[
str
]]]:
"""
Notifies worker-side connector ids of requests that have
finished generating tokens.
Returns:
ids of requests that have finished asynchronous transfer
(requests that previously returned True from request_finished()),
tuple of (sending/saving ids, recving/loading ids).
The finished saves/sends req ids must belong to a set provided in a
call to this method (this call or a prior one).
"""
return
self
.
_lmcache_engine
.
get_finished
(
finished_req_ids
)
# ==============================
# ==============================
# Scheduler-side methods
# Scheduler-side methods
# ==============================
# ==============================
...
@@ -132,3 +148,20 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
...
@@ -132,3 +148,20 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
scheduler_output (SchedulerOutput): the scheduler output object.
scheduler_output (SchedulerOutput): the scheduler output object.
"""
"""
return
self
.
_lmcache_engine
.
build_connector_meta
(
scheduler_output
)
return
self
.
_lmcache_engine
.
build_connector_meta
(
scheduler_output
)
def
request_finished
(
self
,
request
:
"Request"
,
block_ids
:
list
[
int
],
)
->
tuple
[
bool
,
Optional
[
dict
[
str
,
Any
]]]:
"""
Called when a request has finished, before its blocks are freed.
Returns:
True if the request is being saved/sent asynchronously and blocks
should not be freed until the request_id is returned from
get_finished().
Optional KVTransferParams to be included in the request outputs
returned by the engine.
"""
return
self
.
_lmcache_engine
.
request_finished
(
request
,
block_ids
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment