Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e94ec597
Unverified
Commit
e94ec597
authored
Feb 09, 2026
by
Yuwei An
Committed by
GitHub
Feb 10, 2026
Browse files
[LMCache] Token Base IPC API (#34175)
Signed-off-by:
Oasis-Git
<
ayw.sirius19@gmail.com
>
parent
13397841
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
376 additions
and
90 deletions
+376
-90
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py
...connector/v1/lmcache_integration/multi_process_adapter.py
+344
-73
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
...buted/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
+32
-17
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_integration/multi_process_adapter.py
View file @
e94ec597
This diff is collapsed.
Click to expand it.
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py
View file @
e94ec597
...
...
@@ -3,7 +3,7 @@
import
enum
from
collections.abc
import
Iterable
from
dataclasses
import
dataclass
,
field
from
typing
import
TYPE_CHECKING
,
Any
,
Literal
,
cast
from
typing
import
TYPE_CHECKING
,
Any
,
Literal
import
torch
import
zmq
...
...
@@ -130,12 +130,6 @@ def create_worker_adapter(
)
def
convert_block_hashes_to_bytes
(
block_hashes
:
list
[
"BlockHash"
],
)
->
list
[
bytes
]:
return
cast
(
list
[
bytes
],
block_hashes
)
class
LMCacheMPRequestState
(
enum
.
Enum
):
"""
State machine:
...
...
@@ -266,6 +260,7 @@ class LMCacheMPRequestMetadata:
Args:
tracker: The request tracker to generate the metadata from.
blocks_in_chunk: the number of blocks in a LMCache data chunk
vllm_block_size: the block size used in vLLM
"""
# Store the blocks that has block hashes
# NOTE: the invariant here is that `num_stored_blocks` should
...
...
@@ -282,15 +277,21 @@ class LMCacheMPRequestMetadata:
if
num_chunks
>=
1
:
start
=
tracker
.
num_stored_blocks
end
=
start
+
num_chunks
*
blocks_in_chunk
block_hashes
=
convert_block_hashes_to_bytes
(
tracker
.
block_hashes
[
start
:
end
]
)
block_ids
=
tracker
.
allocated_block_ids
[
start
:
end
]
start_token_idx
=
start
*
vllm_block_size
end_token_idx
=
end
*
vllm_block_size
token_ids
=
list
(
tracker
.
all_token_ids
)
op
=
LoadStoreOp
(
token_ids
=
token_ids
,
block_ids
=
block_ids
,
start
=
start_token_idx
,
end
=
end_token_idx
,
)
ret
=
LMCacheMPRequestMetadata
(
request_id
=
tracker
.
request_id
,
direction
=
"STORE"
,
op
=
LoadStoreOp
(
block_hashes
=
block_hashes
,
block_ids
=
block_ids
)
,
op
=
op
,
)
# Update the request tracker
...
...
@@ -303,6 +304,7 @@ class LMCacheMPRequestMetadata:
def
GetRetrieveMetadata
(
tracker
:
LMCacheMPRequestTracker
,
blocks_in_chunk
:
int
,
vllm_block_size
:
int
,
)
->
"LMCacheMPRequestMetadata | None"
:
"""
Generate the retrieve metadata for the current request tracker.
...
...
@@ -310,6 +312,7 @@ class LMCacheMPRequestMetadata:
Args:
tracker: The request tracker to generate the metadata from.
blocks_in_chunk: the number of blocks in a LMCache data chunk
vllm_block_size: the block size used in vLLM
"""
if
not
tracker
.
is_ready_for_retrieving
():
return
None
...
...
@@ -330,15 +333,21 @@ class LMCacheMPRequestMetadata:
"number of LMCache hit blocks. "
)
if
end
>
start
:
block_hashes
=
convert_block_hashes_to_bytes
(
tracker
.
block_hashes
[
start
:
end
]
)
block_ids
=
tracker
.
allocated_block_ids
[
start
:
end
]
start_token_idx
=
start
*
vllm_block_size
end_token_idx
=
end
*
vllm_block_size
token_ids
=
list
(
tracker
.
all_token_ids
)
op
=
LoadStoreOp
(
token_ids
=
token_ids
,
block_ids
=
block_ids
,
start
=
start_token_idx
,
end
=
end_token_idx
,
)
ret
=
LMCacheMPRequestMetadata
(
request_id
=
tracker
.
request_id
,
direction
=
"RETRIEVE"
,
op
=
LoadStoreOp
(
block_hashes
=
block_hashes
,
block_ids
=
block_ids
)
,
op
=
op
,
)
return
ret
...
...
@@ -643,7 +652,8 @@ class LMCacheMPConnector(KVConnectorBase_V1):
return
0
,
False
self
.
scheduler_adapter
.
maybe_submit_lookup_request
(
request
.
request_id
,
convert_block_hashes_to_bytes
(
request
.
block_hashes
)
request
.
request_id
,
token_ids
=
list
(
request
.
all_token_ids
),
)
ret
=
self
.
scheduler_adapter
.
check_lookup_result
(
request
.
request_id
)
...
...
@@ -766,6 +776,9 @@ class LMCacheMPConnector(KVConnectorBase_V1):
"""
# Clean up request tracker to prevent memory leak
self
.
_cleanup_request_tracker
(
request
.
request_id
)
# Notify LMCache to end the session for this request
self
.
scheduler_adapter
.
end_session
(
request
.
request_id
)
return
True
,
None
def
take_events
(
self
)
->
Iterable
[
"KVCacheEvent"
]:
...
...
@@ -846,7 +859,9 @@ class LMCacheMPConnector(KVConnectorBase_V1):
if
request_tracker
.
state
!=
LMCacheMPRequestState
.
WAITING_FOR_LOAD
:
continue
r_metadata
=
LMCacheMPRequestMetadata
.
GetRetrieveMetadata
(
request_tracker
,
blocks_per_chunk
request_tracker
,
blocks_per_chunk
,
vllm_block_size
=
self
.
vllm_block_size
,
)
if
r_metadata
is
not
None
:
metadata
.
add_request_metadata
(
r_metadata
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment