Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ffae72b7
Unverified
Commit
ffae72b7
authored
Aug 15, 2025
by
Harrison Saturley-Hall
Committed by
GitHub
Aug 15, 2025
Browse files
fix: remove kvmanager feature from python 3.12 ai-dynamo-runtime wheel (#2456)
parent
5066dd48
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
46 additions
and
18 deletions
+46
-18
Cargo.lock
Cargo.lock
+9
-9
Cargo.toml
Cargo.toml
+1
-1
container/Dockerfile
container/Dockerfile
+8
-1
container/Dockerfile.vllm
container/Dockerfile.vllm
+8
-1
container/build.sh
container/build.sh
+8
-0
lib/bindings/python/Cargo.toml
lib/bindings/python/Cargo.toml
+1
-1
lib/bindings/python/pyproject.toml
lib/bindings/python/pyproject.toml
+1
-1
lib/bindings/python/tests/test_block_manager.py
lib/bindings/python/tests/test_block_manager.py
+7
-1
lib/runtime/examples/Cargo.toml
lib/runtime/examples/Cargo.toml
+1
-1
pyproject.toml
pyproject.toml
+2
-2
No files found.
Cargo.lock
View file @
ffae72b7
...
@@ -1814,7 +1814,7 @@ dependencies = [
...
@@ -1814,7 +1814,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-engine-llamacpp"
name = "dynamo-engine-llamacpp"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"async-stream",
"async-stream",
"dynamo-llm",
"dynamo-llm",
...
@@ -1826,7 +1826,7 @@ dependencies = [
...
@@ -1826,7 +1826,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-engine-mistralrs"
name = "dynamo-engine-mistralrs"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"async-openai",
"async-openai",
...
@@ -1844,7 +1844,7 @@ dependencies = [
...
@@ -1844,7 +1844,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-llm"
name = "dynamo-llm"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"ahash",
"ahash",
"akin",
"akin",
...
@@ -1923,7 +1923,7 @@ dependencies = [
...
@@ -1923,7 +1923,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-run"
name = "dynamo-run"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"async-openai",
"async-openai",
...
@@ -1952,7 +1952,7 @@ dependencies = [
...
@@ -1952,7 +1952,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-runtime"
name = "dynamo-runtime"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"arc-swap",
"arc-swap",
...
@@ -2009,7 +2009,7 @@ dependencies = [
...
@@ -2009,7 +2009,7 @@ dependencies = [
[[package]]
[[package]]
name = "dynamo-tokens"
name = "dynamo-tokens"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"bytemuck",
"bytemuck",
"derive-getters",
"derive-getters",
...
@@ -3776,7 +3776,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
...
@@ -3776,7 +3776,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
[[package]]
name = "libdynamo_llm"
name = "libdynamo_llm"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"anyhow",
"anyhow",
"async-once-cell",
"async-once-cell",
...
@@ -4088,7 +4088,7 @@ dependencies = [
...
@@ -4088,7 +4088,7 @@ dependencies = [
[[package]]
[[package]]
name = "metrics"
name = "metrics"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"axum 0.8.3",
"axum 0.8.3",
"clap 4.5.40",
"clap 4.5.40",
...
@@ -5899,7 +5899,7 @@ dependencies = [
...
@@ -5899,7 +5899,7 @@ dependencies = [
[[package]]
[[package]]
name = "router"
name = "router"
version = "0.4.0"
version = "0.4.0
+post0
"
dependencies = [
dependencies = [
"clap 4.5.40",
"clap 4.5.40",
"dynamo-llm",
"dynamo-llm",
...
...
Cargo.toml
View file @
ffae72b7
...
@@ -15,7 +15,7 @@ members = [
...
@@ -15,7 +15,7 @@ members = [
resolver
=
"3"
resolver
=
"3"
[workspace.package]
[workspace.package]
version
=
"0.4.0"
version
=
"0.4.0
+post0
"
edition
=
"2021"
edition
=
"2021"
description
=
"Dynamo Inference Framework"
description
=
"Dynamo Inference Framework"
authors
=
[
"NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"
]
authors
=
[
"NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"
]
...
...
container/Dockerfile
View file @
ffae72b7
...
@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
...
@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions.
# can be updated to later versions.
ARG
BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG
BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG
RELEASE_BUILD=false
ARG
RELEASE_BUILD=false
ARG
ENABLE_KVBM=false
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
@@ -197,6 +198,8 @@ ARG CARGO_BUILD_JOBS
...
@@ -197,6 +198,8 @@ ARG CARGO_BUILD_JOBS
ENV
CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
ENV
CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG
RELEASE_BUILD
ARG
RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG
ENABLE_KVBM
WORKDIR
/opt/dynamo
WORKDIR
/opt/dynamo
...
@@ -228,7 +231,11 @@ COPY components/ /opt/dynamo/components/
...
@@ -228,7 +231,11 @@ COPY components/ /opt/dynamo/components/
RUN
uv build
--wheel
--out-dir
/opt/dynamo/dist
&&
\
RUN
uv build
--wheel
--out-dir
/opt/dynamo/dist
&&
\
cd
/opt/dynamo/lib/bindings/python
&&
\
cd
/opt/dynamo/lib/bindings/python
&&
\
uv pip
install
maturin[patchelf]
&&
\
uv pip
install
maturin[patchelf]
&&
\
maturin build
--release
--features
block-manager
--out
/opt/dynamo/dist
&&
\
if
[
"
$ENABLE_KVBM
"
=
"true"
]
;
then
\
maturin build
--release
--features
block-manager
--out
/opt/dynamo/dist
;
\
else
\
maturin build
--release
--out
/opt/dynamo/dist
;
\
fi
&&
\
if
[
"
$RELEASE_BUILD
"
=
"true"
]
;
then
\
if
[
"
$RELEASE_BUILD
"
=
"true"
]
;
then
\
# do not enable KVBM feature, ensure compatibility with lower glibc
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
...
...
container/Dockerfile.vllm
View file @
ffae72b7
...
@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
...
@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions.
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG RELEASE_BUILD
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
...
@@ -320,6 +321,8 @@ ARG CARGO_BUILD_JOBS
...
@@ -320,6 +321,8 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
# Keep in sync with the base image.
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
...
@@ -369,7 +372,11 @@ RUN cargo build \
...
@@ -369,7 +372,11 @@ RUN cargo build \
RUN uv build --wheel --out-dir /workspace/dist && \
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /workspace/dist; \
else \
maturin build --release --out /workspace/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \
uv run --python 3.11 maturin build --release --out /workspace/dist && \
...
...
container/build.sh
View file @
ffae72b7
...
@@ -274,6 +274,9 @@ get_options() {
...
@@ -274,6 +274,9 @@ get_options() {
--release-build
)
--release-build
)
RELEASE_BUILD
=
true
RELEASE_BUILD
=
true
;;
;;
--enable-kvbm
)
ENABLE_KVBM
=
true
;;
--make-efa
)
--make-efa
)
NIXL_UCX_REF
=
$NIXL_UCX_EFA_REF
NIXL_UCX_REF
=
$NIXL_UCX_EFA_REF
;;
;;
...
@@ -530,6 +533,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
...
@@ -530,6 +533,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
BUILD_ARGS+
=
" --build-arg RELEASE_BUILD=
${
RELEASE_BUILD
}
"
BUILD_ARGS+
=
" --build-arg RELEASE_BUILD=
${
RELEASE_BUILD
}
"
fi
fi
if
[
!
-z
${
ENABLE_KVBM
}
]
;
then
echo
"Enabling the KVBM in the ai-dynamo-runtime"
BUILD_ARGS+
=
" --build-arg ENABLE_KVBM=
${
ENABLE_KVBM
}
"
fi
if
[
-n
"
${
NIXL_UCX_REF
}
"
]
;
then
if
[
-n
"
${
NIXL_UCX_REF
}
"
]
;
then
BUILD_ARGS+
=
" --build-arg NIXL_UCX_REF=
${
NIXL_UCX_REF
}
"
BUILD_ARGS+
=
" --build-arg NIXL_UCX_REF=
${
NIXL_UCX_REF
}
"
fi
fi
...
...
lib/bindings/python/Cargo.toml
View file @
ffae72b7
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
[package]
[package]
name
=
"dynamo-py3"
name
=
"dynamo-py3"
version
=
"0.4.0"
version
=
"0.4.0
+post0
"
edition
=
"2021"
edition
=
"2021"
authors
=
["NVIDIA"]
authors
=
["NVIDIA"]
license
=
"Apache-2.0"
license
=
"Apache-2.0"
...
...
lib/bindings/python/pyproject.toml
View file @
ffae72b7
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
[project]
[project]
name
=
"ai-dynamo-runtime"
name
=
"ai-dynamo-runtime"
dynamic
=
["version"]
version
=
"0.4.0.post0"
description
=
"Dynamo Inference Framework Runtime"
description
=
"Dynamo Inference Framework Runtime"
readme
=
"README.md"
readme
=
"README.md"
authors
=
[
authors
=
[
...
...
lib/bindings/python/tests/test_block_manager.py
View file @
ffae72b7
...
@@ -19,7 +19,13 @@ import asyncio
...
@@ -19,7 +19,13 @@ import asyncio
import
pytest
import
pytest
import
torch
import
torch
from
dynamo.llm
import
BlockManager
# Attempt to import the optional module
try
:
from
dynamo.llm
import
BlockManager
except
ImportError
:
pytest
.
importorskip
(
"optional_module"
,
reason
=
"block-manager feature is not enabled"
)
pytestmark
=
pytest
.
mark
.
pre_merge
pytestmark
=
pytest
.
mark
.
pre_merge
...
...
lib/runtime/examples/Cargo.toml
View file @
ffae72b7
...
@@ -22,7 +22,7 @@ members = [
...
@@ -22,7 +22,7 @@ members = [
resolver
=
"3"
resolver
=
"3"
[workspace.package]
[workspace.package]
version
=
"0.4.0"
version
=
"0.4.0
+post0
"
edition
=
"2021"
edition
=
"2021"
authors
=
["NVIDIA"]
authors
=
["NVIDIA"]
license
=
"Apache-2.0"
license
=
"Apache-2.0"
...
...
pyproject.toml
View file @
ffae72b7
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
[project]
[project]
name
=
"ai-dynamo"
name
=
"ai-dynamo"
version
=
"0.4.0"
version
=
"0.4.0
.post0
"
description
=
"Distributed Inference Framework"
description
=
"Distributed Inference Framework"
readme
=
"README.md"
readme
=
"README.md"
authors
=
[
authors
=
[
...
@@ -13,7 +13,7 @@ license = { text = "Apache-2.0" }
...
@@ -13,7 +13,7 @@ license = { text = "Apache-2.0" }
license-files
=
["LICENSE"]
license-files
=
["LICENSE"]
requires-python
=
">=3.10"
requires-python
=
">=3.10"
dependencies
=
[
dependencies
=
[
"ai-dynamo-runtime==0.4.0"
,
"ai-dynamo-runtime==0.4.0
.post0
"
,
"pytest>=8.3.4"
,
"pytest>=8.3.4"
,
"types-psutil>=7.0.0.20250218"
,
"types-psutil>=7.0.0.20250218"
,
"kubernetes>=32.0.1,<33.0.0"
,
"kubernetes>=32.0.1,<33.0.0"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment