Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b12cb383
Unverified
Commit
b12cb383
authored
Dec 30, 2025
by
chunxiaozheng
Committed by
GitHub
Dec 29, 2025
Browse files
implements register kv caches in lmcache connector (#31397)
Signed-off-by:
idellzheng
<
idellzheng@tencent.com
>
parent
5bc66411
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
0 deletions
+26
-0
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py
...tributed/kv_transfer/kv_connector/v1/lmcache_connector.py
+16
-0
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/vllm_v1_adapter.py
...er/kv_connector/v1/lmcache_integration/vllm_v1_adapter.py
+10
-0
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py
View file @
b12cb383
...
@@ -107,6 +107,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
...
@@ -107,6 +107,22 @@ class LMCacheConnectorV1(KVConnectorBase_V1):
# ==============================
# ==============================
# Worker-side methods
# Worker-side methods
# ==============================
# ==============================
def
register_kv_caches
(
self
,
kv_caches
:
dict
[
str
,
torch
.
Tensor
]):
"""
Initialize with the KV caches. Useful for pre-registering the
KV Caches in the KVConnector (e.g. for NIXL).
Args:
kv_caches: dictionary of layer names, kv cache
"""
if
hasattr
(
self
.
_lmcache_engine
,
"register_kv_caches"
):
self
.
_lmcache_engine
.
register_kv_caches
(
kv_caches
)
else
:
logger
.
warning
(
"LMCache engine does not support register_kv_caches, "
"please check and use the latest version"
)
def
start_load_kv
(
self
,
forward_context
:
"ForwardContext"
,
**
kwargs
:
Any
)
->
None
:
def
start_load_kv
(
self
,
forward_context
:
"ForwardContext"
,
**
kwargs
:
Any
)
->
None
:
"""
"""
Start loading the KV cache from the connector to vLLM's paged
Start loading the KV cache from the connector to vLLM's paged
...
...
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/vllm_v1_adapter.py
View file @
b12cb383
...
@@ -782,6 +782,16 @@ class LMCacheConnectorV1Impl:
...
@@ -782,6 +782,16 @@ class LMCacheConnectorV1Impl:
####################
####################
# Worker side APIs
# Worker side APIs
####################
####################
@
_lmcache_nvtx_annotate
def
register_kv_caches
(
self
,
kv_caches
:
dict
[
str
,
torch
.
Tensor
]):
logger
.
info
(
"Registering KV caches"
)
# TODO(chunxiaozheng): `_init_kv_caches_from_forward_context` is
# not called, we should consider removing it.
assert
len
(
self
.
kv_caches
)
==
0
and
len
(
kv_caches
)
>
0
self
.
kv_caches
=
kv_caches
if
self
.
lmcache_engine
is
not
None
:
kvcaches
=
list
(
self
.
kv_caches
.
values
())
self
.
lmcache_engine
.
post_init
(
kvcaches
=
kvcaches
)
@
_lmcache_nvtx_annotate
@
_lmcache_nvtx_annotate
def
start_load_kv
(
self
,
forward_context
:
"ForwardContext"
,
**
kwargs
)
->
None
:
def
start_load_kv
(
self
,
forward_context
:
"ForwardContext"
,
**
kwargs
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment