Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6d7f0377
Unverified
Commit
6d7f0377
authored
Mar 08, 2025
by
Jiayi Yao
Committed by
GitHub
Mar 08, 2025
Browse files
[Feat] Support chunked prefill for LMCache connector (#14505)
Signed-off-by:
YaoJiayi
<
120040070@link.cuhk.edu.cn
>
parent
10f75527
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
19 deletions
+9
-19
vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py
...distributed/kv_transfer/kv_connector/lmcache_connector.py
+9
-19
No files found.
vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py
View file @
6d7f0377
...
@@ -38,7 +38,8 @@ class LMCacheConnector(KVConnectorBase):
...
@@ -38,7 +38,8 @@ class LMCacheConnector(KVConnectorBase):
from
lmcache.integration.vllm.utils
import
ENGINE_NAME
from
lmcache.integration.vllm.utils
import
ENGINE_NAME
from
lmcache.integration.vllm.vllm_adapter
import
(
from
lmcache.integration.vllm.vllm_adapter
import
(
RetrieveStatus
,
StoreStatus
,
init_lmcache_engine
,
RetrieveStatus
,
StoreStatus
,
init_lmcache_engine
,
lmcache_retrieve_kv
,
lmcache_should_store
,
lmcache_store_kv
)
lmcache_retrieve_kv
,
lmcache_should_retrieve
,
lmcache_should_store
,
lmcache_store_kv
)
logger
.
info
(
"Initializing LMCacheConfig under kv_transfer_config %s"
,
logger
.
info
(
"Initializing LMCacheConfig under kv_transfer_config %s"
,
self
.
transfer_config
)
self
.
transfer_config
)
...
@@ -54,6 +55,7 @@ class LMCacheConnector(KVConnectorBase):
...
@@ -54,6 +55,7 @@ class LMCacheConnector(KVConnectorBase):
self
.
cache_config
=
config
.
cache_config
self
.
cache_config
=
config
.
cache_config
self
.
lmcache_retrieve_kv
=
lmcache_retrieve_kv
self
.
lmcache_retrieve_kv
=
lmcache_retrieve_kv
self
.
lmcache_store_kv
=
lmcache_store_kv
self
.
lmcache_store_kv
=
lmcache_store_kv
self
.
lmcache_should_retrieve
=
lmcache_should_retrieve
self
.
lmcache_should_store
=
lmcache_should_store
self
.
lmcache_should_store
=
lmcache_should_store
self
.
store_status
=
StoreStatus
self
.
store_status
=
StoreStatus
self
.
retrieve_status
=
RetrieveStatus
self
.
retrieve_status
=
RetrieveStatus
...
@@ -65,15 +67,11 @@ class LMCacheConnector(KVConnectorBase):
...
@@ -65,15 +67,11 @@ class LMCacheConnector(KVConnectorBase):
)
->
Tuple
[
Union
[
torch
.
Tensor
,
IntermediateTensors
],
bool
,
)
->
Tuple
[
Union
[
torch
.
Tensor
,
IntermediateTensors
],
bool
,
"ModelInputForGPUWithSamplingMetadata"
]:
"ModelInputForGPUWithSamplingMetadata"
]:
hidden_or_intermediate_states
=
None
retrieve_status
=
self
.
lmcache_should_retrieve
(
model_input
)
model_input
,
bypass_model_exec
,
hidden_or_intermediate_states
=
\
# TODO (Jiayi): Need to support chunked prefill
self
.
lmcache_retrieve_kv
(
retrieve_status
=
self
.
retrieve_status
.
PREFILL
model_executable
,
model_input
,
self
.
cache_config
,
kv_caches
,
retrieve_status
)
model_input
,
bypass_model_exec
=
self
.
lmcache_retrieve_kv
(
model_executable
,
model_input
,
self
.
cache_config
,
kv_caches
,
retrieve_status
)
return
hidden_or_intermediate_states
,
bypass_model_exec
,
model_input
return
hidden_or_intermediate_states
,
bypass_model_exec
,
model_input
def
send_kv_caches_and_hidden_states
(
def
send_kv_caches_and_hidden_states
(
...
@@ -84,15 +82,7 @@ class LMCacheConnector(KVConnectorBase):
...
@@ -84,15 +82,7 @@ class LMCacheConnector(KVConnectorBase):
hidden_or_intermediate_states
:
Union
[
torch
.
Tensor
,
hidden_or_intermediate_states
:
Union
[
torch
.
Tensor
,
IntermediateTensors
],
IntermediateTensors
],
)
->
None
:
)
->
None
:
num_reqs
=
0
seq_group_list
=
model_input
.
sampling_metadata
.
seq_groups
assert
seq_group_list
is
not
None
for
seq_group
in
seq_group_list
:
seq_ids
=
seq_group
.
seq_ids
for
seq_id
in
seq_ids
:
num_reqs
+=
1
# TODO (Jiayi): Only normal prefill is supported for now
store_status
=
self
.
lmcache_should_store
(
model_input
)
store_status
=
self
.
lmcache_should_store
(
model_input
)
self
.
lmcache_store_kv
(
self
.
lmcache_store_kv
(
self
.
model_config
,
self
.
model_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment