Unverified Commit ffae72b7 authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: remove kvmanager feature from python 3.12 ai-dynamo-runtime wheel (#2456)

parent 5066dd48
...@@ -1814,7 +1814,7 @@ dependencies = [ ...@@ -1814,7 +1814,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-engine-llamacpp" name = "dynamo-engine-llamacpp"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"dynamo-llm", "dynamo-llm",
...@@ -1826,7 +1826,7 @@ dependencies = [ ...@@ -1826,7 +1826,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-engine-mistralrs" name = "dynamo-engine-mistralrs"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai", "async-openai",
...@@ -1844,7 +1844,7 @@ dependencies = [ ...@@ -1844,7 +1844,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-llm" name = "dynamo-llm"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"ahash", "ahash",
"akin", "akin",
...@@ -1923,7 +1923,7 @@ dependencies = [ ...@@ -1923,7 +1923,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-run" name = "dynamo-run"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-openai", "async-openai",
...@@ -1952,7 +1952,7 @@ dependencies = [ ...@@ -1952,7 +1952,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-runtime" name = "dynamo-runtime"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arc-swap", "arc-swap",
...@@ -2009,7 +2009,7 @@ dependencies = [ ...@@ -2009,7 +2009,7 @@ dependencies = [
[[package]] [[package]]
name = "dynamo-tokens" name = "dynamo-tokens"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"derive-getters", "derive-getters",
...@@ -3776,7 +3776,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" ...@@ -3776,7 +3776,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]] [[package]]
name = "libdynamo_llm" name = "libdynamo_llm"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-once-cell", "async-once-cell",
...@@ -4088,7 +4088,7 @@ dependencies = [ ...@@ -4088,7 +4088,7 @@ dependencies = [
[[package]] [[package]]
name = "metrics" name = "metrics"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"axum 0.8.3", "axum 0.8.3",
"clap 4.5.40", "clap 4.5.40",
...@@ -5899,7 +5899,7 @@ dependencies = [ ...@@ -5899,7 +5899,7 @@ dependencies = [
[[package]] [[package]]
name = "router" name = "router"
version = "0.4.0" version = "0.4.0+post0"
dependencies = [ dependencies = [
"clap 4.5.40", "clap 4.5.40",
"dynamo-llm", "dynamo-llm",
......
...@@ -15,7 +15,7 @@ members = [ ...@@ -15,7 +15,7 @@ members = [
resolver = "3" resolver = "3"
[workspace.package] [workspace.package]
version = "0.4.0" version = "0.4.0+post0"
edition = "2021" edition = "2021"
description = "Dynamo Inference Framework" description = "Dynamo Inference Framework"
authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"] authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
......
...@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ...@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions. # can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD=false ARG RELEASE_BUILD=false
ARG ENABLE_KVBM=false
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -197,6 +198,8 @@ ARG CARGO_BUILD_JOBS ...@@ -197,6 +198,8 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. # Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
WORKDIR /opt/dynamo WORKDIR /opt/dynamo
...@@ -228,7 +231,11 @@ COPY components/ /opt/dynamo/components/ ...@@ -228,7 +231,11 @@ COPY components/ /opt/dynamo/components/
RUN uv build --wheel --out-dir /opt/dynamo/dist && \ RUN uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \ cd /opt/dynamo/lib/bindings/python && \
uv pip install maturin[patchelf] && \ uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /opt/dynamo/dist && \ if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \ if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc # do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \ uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
......
...@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ...@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions. # can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD ARG RELEASE_BUILD
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
...@@ -320,6 +321,8 @@ ARG CARGO_BUILD_JOBS ...@@ -320,6 +321,8 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. # Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
# Keep in sync with the base image. # Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
...@@ -369,7 +372,11 @@ RUN cargo build \ ...@@ -369,7 +372,11 @@ RUN cargo build \
RUN uv build --wheel --out-dir /workspace/dist && \ RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \ cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \ uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \ if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /workspace/dist; \
else \
maturin build --release --out /workspace/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \ if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc # do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \ uv run --python 3.11 maturin build --release --out /workspace/dist && \
......
...@@ -274,6 +274,9 @@ get_options() { ...@@ -274,6 +274,9 @@ get_options() {
--release-build) --release-build)
RELEASE_BUILD=true RELEASE_BUILD=true
;; ;;
--enable-kvbm)
ENABLE_KVBM=true
;;
--make-efa) --make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;; ;;
...@@ -530,6 +533,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then ...@@ -530,6 +533,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} " BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} "
fi fi
if [ ! -z ${ENABLE_KVBM} ]; then
echo "Enabling the KVBM in the ai-dynamo-runtime"
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi
if [ -n "${NIXL_UCX_REF}" ]; then if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} " BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi fi
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
[package] [package]
name = "dynamo-py3" name = "dynamo-py3"
version = "0.4.0" version = "0.4.0+post0"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
[project] [project]
name = "ai-dynamo-runtime" name = "ai-dynamo-runtime"
dynamic = ["version"] version = "0.4.0.post0"
description = "Dynamo Inference Framework Runtime" description = "Dynamo Inference Framework Runtime"
readme = "README.md" readme = "README.md"
authors = [ authors = [
......
...@@ -19,7 +19,13 @@ import asyncio ...@@ -19,7 +19,13 @@ import asyncio
import pytest import pytest
import torch import torch
from dynamo.llm import BlockManager # Attempt to import the optional module
try:
from dynamo.llm import BlockManager
except ImportError:
pytest.importorskip(
"optional_module", reason="block-manager feature is not enabled"
)
pytestmark = pytest.mark.pre_merge pytestmark = pytest.mark.pre_merge
......
...@@ -22,7 +22,7 @@ members = [ ...@@ -22,7 +22,7 @@ members = [
resolver = "3" resolver = "3"
[workspace.package] [workspace.package]
version = "0.4.0" version = "0.4.0+post0"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
[project] [project]
name = "ai-dynamo" name = "ai-dynamo"
version = "0.4.0" version = "0.4.0.post0"
description = "Distributed Inference Framework" description = "Distributed Inference Framework"
readme = "README.md" readme = "README.md"
authors = [ authors = [
...@@ -13,7 +13,7 @@ license = { text = "Apache-2.0" } ...@@ -13,7 +13,7 @@ license = { text = "Apache-2.0" }
license-files = ["LICENSE"] license-files = ["LICENSE"]
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"ai-dynamo-runtime==0.4.0", "ai-dynamo-runtime==0.4.0.post0",
"pytest>=8.3.4", "pytest>=8.3.4",
"types-psutil>=7.0.0.20250218", "types-psutil>=7.0.0.20250218",
"kubernetes>=32.0.1,<33.0.0", "kubernetes>=32.0.1,<33.0.0",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment