Unverified Commit 07cfc3a1 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: kvbm + connector (#2258)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
Co-authored-by: default avatarOlga Andreeva <oandreeva@nvidia.com>
Co-authored-by: default avatarZiqi Fan <ziqif@nvidia.com>
Co-authored-by: default avatarJohn Thompson <jothomson@nvidia.com>
Co-authored-by: default avatarRichard Huo <rihuo@nvidia.com>
Co-authored-by: default avatarZicheng Ma <zichengm@nvidia.com>
parent bf5862a1
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -1130,6 +1130,23 @@ class BlockManager:
"""
...
class KvbmCacheManager:
"""
A KV cache manager for VLLM
"""
def __init__(self, block_manager: BlockManager) -> None:
...
class KvbmRequest:
"""
A request for KV cache
"""
def __init__(self, request_id: int, tokens: List[int], block_size: int) -> None:
...
class ZmqKvEventListener:
"""
A ZMQ-based key-value cache event listener that operates independently
......
......@@ -9,6 +9,8 @@ from dynamo._core import AggregatedMetrics as AggregatedMetrics
try:
from dynamo._core import BlockManager as BlockManager
from dynamo._core import KvbmLeader as KvbmLeader
from dynamo._core import KvbmWorker as KvbmWorker
except ImportError:
pass # BlockManager is not enabled by default
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment