Unverified Commit 5a5bc51e authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

refactor: migrate kvbm python bindings to the python directory (#4318)


Signed-off-by: default avatarRyan Olson <rolson@nvidia.com>
parent 334ce551
......@@ -39,6 +39,7 @@ jobs:
runs-on:
group: Fastchecker
strategy:
# removing kvbm from here - it will fail to test with nixl dep enabled
matrix: { dir: ['.', 'lib/bindings/python', 'lib/runtime/examples', 'launch/dynamo-run'] }
permissions:
contents: read
......
......@@ -14,7 +14,6 @@ members = [
"lib/bindings/c",
"lib/bindings/python/codegen",
"lib/engines/*",
"lib/kvbm",
"lib/config",
]
# Exclude certain packages that are slow to build and we don't ship as flagship
......@@ -107,7 +106,9 @@ tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["codec", "net", "rt"] }
tower-http = { version = "0.6", features = ["trace"] }
axum = { version = "0.8", features = ["macros"] }
axum = { version = "=0.8.4", features = ["macros"] }
hyper = { version = "=1.7.0" }
hyper-util = { version = "=0.1.17" }
tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = [
"env-filter",
......@@ -134,3 +135,4 @@ insta.opt-level = 3
# These make the build much slower but shrink the binary, and could help performance
codegen-units = 1
lto = "thin"
......@@ -317,7 +317,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /opt/dynamo/lib/bindings/python && \
maturin build --release --out /opt/dynamo/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/kvbm && \
cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out /opt/dynamo/dist; \
fi && \
/tmp/use-sccache.sh show-stats "Dynamo"
......
This source diff could not be displayed because it is too large. You can view the blob instead.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[workspace]
[package]
name = "kvbm-py3"
......@@ -23,8 +24,8 @@ default = ["block-manager"]
block-manager = ["dynamo-llm/block-manager", "dep:dlpark", "dep:cudarc"]
[dependencies]
dynamo-llm = { path = "../llm" }
dynamo-runtime = { path = "../runtime" }
dynamo-llm = { path = "../../llm" }
dynamo-runtime = { path = "../../runtime" }
anyhow = { version = "1" }
async-stream = { version = "0.3" }
......
......@@ -19,7 +19,7 @@ limitations under the License.
The Dynamo KVBM is a distributed KV-cache block management system designed for scalable LLM inference. It cleanly separates memory management from inference runtimes (vLLM, TensorRT-LLM, and SGLang), enabling GPU↔CPU↔Disk/Remote tiering, asynchronous block offload/onboard, and efficient block reuse.
![A block diagram showing a layered architecture view of Dynamo KV Block manager.](../../docs/images/kvbm-architecture.png)
![A block diagram showing a layered architecture view of Dynamo KV Block manager.](../../../docs/images/kvbm-architecture.png)
## Feature Highlights
......@@ -84,7 +84,7 @@ DYN_KVBM_CPU_CACHE_GB=100 vllm serve \
Qwen/Qwen3-8B
```
For more detailed integration with dynamo, disaggregated serving support and benchmarking, please check [vllm-setup](../../docs/kvbm/vllm-setup.md)
For more detailed integration with dynamo, disaggregated serving support and benchmarking, please check [vllm-setup](../../../docs/kvbm/vllm-setup.md)
### TensorRT-LLM
......@@ -106,12 +106,12 @@ DYN_KVBM_CPU_CACHE_GB=100 trtllm-serve Qwen/Qwen3-8B \
--extra_llm_api_options /tmp/kvbm_llm_api_config.yaml
```
For more detailed integration with dynamo and benchmarking, please check [trtllm-setup](../../docs/kvbm/trtllm-setup.md)
For more detailed integration with dynamo and benchmarking, please check [trtllm-setup](../../../docs/kvbm/trtllm-setup.md)
## 📚 Docs
- [Architecture](../../docs/kvbm/kvbm_architecture.md)
- [Motivation](../../docs/kvbm/kvbm_motivation.md)
- [Design Deepdive](../../docs/kvbm/kvbm_design_deepdive.md)
- [Architecture](../../../docs/kvbm/kvbm_architecture.md)
- [Motivation](../../../docs/kvbm/kvbm_motivation.md)
- [Design Deepdive](../../../docs/kvbm/kvbm_design_deepdive.md)
- [NIXL Overview](https://github.com/ai-dynamo/nixl/blob/main/docs/nixl.md)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment