Unverified Commit ffae72b7 authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: remove kvmanager feature from python 3.12 ai-dynamo-runtime wheel (#2456)

parent 5066dd48
......@@ -1814,7 +1814,7 @@ dependencies = [
[[package]]
name = "dynamo-engine-llamacpp"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"async-stream",
"dynamo-llm",
......@@ -1826,7 +1826,7 @@ dependencies = [
[[package]]
name = "dynamo-engine-mistralrs"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"anyhow",
"async-openai",
......@@ -1844,7 +1844,7 @@ dependencies = [
[[package]]
name = "dynamo-llm"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"ahash",
"akin",
......@@ -1923,7 +1923,7 @@ dependencies = [
[[package]]
name = "dynamo-run"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"anyhow",
"async-openai",
......@@ -1952,7 +1952,7 @@ dependencies = [
[[package]]
name = "dynamo-runtime"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"anyhow",
"arc-swap",
......@@ -2009,7 +2009,7 @@ dependencies = [
[[package]]
name = "dynamo-tokens"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"bytemuck",
"derive-getters",
......@@ -3776,7 +3776,7 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "libdynamo_llm"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"anyhow",
"async-once-cell",
......@@ -4088,7 +4088,7 @@ dependencies = [
[[package]]
name = "metrics"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"axum 0.8.3",
"clap 4.5.40",
......@@ -5899,7 +5899,7 @@ dependencies = [
[[package]]
name = "router"
version = "0.4.0"
version = "0.4.0+post0"
dependencies = [
"clap 4.5.40",
"dynamo-llm",
......
......@@ -15,7 +15,7 @@ members = [
resolver = "3"
[workspace.package]
version = "0.4.0"
version = "0.4.0+post0"
edition = "2021"
description = "Dynamo Inference Framework"
authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
......
......@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD=false
ARG ENABLE_KVBM=false
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
......@@ -197,6 +198,8 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
WORKDIR /opt/dynamo
......@@ -228,7 +231,11 @@ COPY components/ /opt/dynamo/components/
RUN uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /opt/dynamo/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
......
......@@ -8,6 +8,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
......@@ -320,6 +321,8 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
......@@ -369,7 +372,11 @@ RUN cargo build \
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /workspace/dist; \
else \
maturin build --release --out /workspace/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \
......
......@@ -274,6 +274,9 @@ get_options() {
--release-build)
RELEASE_BUILD=true
;;
--enable-kvbm)
ENABLE_KVBM=true
;;
--make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;;
......@@ -530,6 +533,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} "
fi
if [ ! -z ${ENABLE_KVBM} ]; then
echo "Enabling the KVBM in the ai-dynamo-runtime"
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi
if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi
......
......@@ -19,7 +19,7 @@
[package]
name = "dynamo-py3"
version = "0.4.0"
version = "0.4.0+post0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
......
......@@ -16,7 +16,7 @@
[project]
name = "ai-dynamo-runtime"
dynamic = ["version"]
version = "0.4.0.post0"
description = "Dynamo Inference Framework Runtime"
readme = "README.md"
authors = [
......
......@@ -19,7 +19,13 @@ import asyncio
import pytest
import torch
from dynamo.llm import BlockManager
# Attempt to import the optional module
try:
from dynamo.llm import BlockManager
except ImportError:
pytest.importorskip(
"optional_module", reason="block-manager feature is not enabled"
)
pytestmark = pytest.mark.pre_merge
......
......@@ -22,7 +22,7 @@ members = [
resolver = "3"
[workspace.package]
version = "0.4.0"
version = "0.4.0+post0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
......
......@@ -3,7 +3,7 @@
[project]
name = "ai-dynamo"
version = "0.4.0"
version = "0.4.0.post0"
description = "Distributed Inference Framework"
readme = "README.md"
authors = [
......@@ -13,7 +13,7 @@ license = { text = "Apache-2.0" }
license-files = ["LICENSE"]
requires-python = ">=3.10"
dependencies = [
"ai-dynamo-runtime==0.4.0",
"ai-dynamo-runtime==0.4.0.post0",
"pytest>=8.3.4",
"types-psutil>=7.0.0.20250218",
"kubernetes>=32.0.1,<33.0.0",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment