Unverified Commit 0b8b7ffb authored by Kyle McGill's avatar Kyle McGill Committed by GitHub
Browse files

chore: Upgrade NIXL to 0.7.1 and allow KVBM to pick NIXL cuda 12 or 13 (#4281)


Signed-off-by: default avatarTushar Sharma <tusharma@nvidia.com>
Co-authored-by: default avatarTushar Sharma <tusharma@nvidia.com>
parent d01b6d2d
......@@ -6719,9 +6719,9 @@ dependencies = [
[[package]]
name = "nixl-sys"
version = "0.7.0"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a73b92494c94b2ff2d004cd9274d966863089e867dc9cd98bc640aefe7622036"
checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31"
dependencies = [
"bindgen 0.71.1",
"cc",
......
......@@ -38,7 +38,7 @@ ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.7.0
ARG NIXL_REF=0.7.1
ARG NIXL_GDRCOPY_REF=v2.5.1
# Python configuration
......@@ -319,7 +319,14 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
maturin build --release --out /opt/dynamo/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out /opt/dynamo/dist; \
maturin build --release --out target/wheels && \
auditwheel repair \
--exclude libnixl.so \
--exclude libnixl_build.so \
--exclude libnixl_common.so \
--plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \
fi && \
/tmp/use-sccache.sh show-stats "Dynamo"
......@@ -396,6 +403,7 @@ COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_L
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
......
......@@ -115,7 +115,7 @@ NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_REF=0.7.0
NIXL_REF=0.7.1
NIXL_UCX_REF=v1.19.0
NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76
......
......@@ -58,14 +58,14 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
| :----------------- | :---------- | :------------------------------------ | :----------- |
| ai-dynamo | 0.7.0 | >=2.28 | |
| ai-dynamo-runtime | 0.7.0 | >=2.28 (Python 3.12 has known issues) | |
| NIXL | 0.7.0 | >=2.27 | >=11.8 |
| NIXL | 0.7.1 | >=2.27 | >=11.8 |
### Build Dependency
| **Build Dependency** | **Version** |
| :------------------- | :------------------------------------------------------------------------------- |
| **TensorRT-LLM** | 1.1.0rc5 |
| **NIXL** | 0.7.0 |
| **NIXL** | 0.7.1 |
| **vLLM** | 0.10.1.1 |
| **SGLang** | 0.5.3rc0 |
......
......@@ -26,8 +26,8 @@ license = { text = "Apache-2.0" }
license-files = ["LICENSE"]
requires-python = ">=3.10"
dependencies = [
"nixl==0.7.0",
"pydantic>=2.0",
"nixl[cu12]==0.7.1",
"pydantic>=2.0"
]
classifiers = [
"Development Status :: 4 - Beta",
......@@ -45,6 +45,8 @@ classifiers = [
keywords = ["llm", "genai", "inference", "nvidia", "kvcache", "dynamo"]
[project.optional-dependencies]
cu12 = ["nixl[cu12]==0.7.1"]
cu13 = ["nixl[cu13]==0.7.1"]
test = [
"pytest>=8.3.4",
"pytest-mypy",
......@@ -61,10 +63,10 @@ module-name = "kvbm._core"
manifest-path = "Cargo.toml"
python-packages = ["kvbm"]
python-source = "python"
# Skip auditwheel here - we'll run it manually in dynamo/container/Dockerfile with exclusions
skip-auditwheel = true
[build-system]
requires = ["maturin>=1.0,<2.0", "patchelf"]
build-backend = "maturin"
[tool.uv]
config-settings = { build-args = '--auditwheel repair --manylinux' }
......@@ -2,6 +2,14 @@
# SPDX-License-Identifier: Apache-2.0
# flake8: noqa
import logging
logger = logging.getLogger(__name__)
# nixl needs to be loaded before any other imports to ensure that the nixl shared object is available for the KVBM core.
import nixl
logger.info(f"Loaded nixl API module: {nixl._api}")
from kvbm._core import BlockManager as BlockManager
from kvbm._core import KvbmLeader as KvbmLeader
......
......@@ -3,9 +3,6 @@
from typing import Optional
# Keeping this import is important because it runs the code in nixl’s __init__.py
# to set up the Nixl plugin path.
import nixl # noqa: F401
import torch
from kvbm.trtllm_integration.rust import KvConnectorWorker as RustKvConnectorWorker
from kvbm.utils import is_dyn_runtime_enabled
......
......@@ -9,9 +9,6 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Optional
# Keeping this import is important because it runs the code in nixl’s __init__.py
# to set up the Nixl plugin path when there is no pre-defined NIXL_PLUGIN_DIR
import nixl # noqa: F401
import torch
from kvbm.utils import is_dyn_runtime_enabled
from vllm.config import VllmConfig
......
......@@ -95,7 +95,7 @@ dialoguer = { version = "0.11", default-features = false, features = [
# block_manager
aligned-vec = { version = "0.6.4", optional = true }
nixl-sys = { version = "=0.7.0", optional = true }
nixl-sys = { version = "=0.7.1", optional = true }
cudarc = { workspace = true, optional = true }
nix = { version = "0.26", optional = true }
......
......@@ -54,13 +54,13 @@ trtllm =[
vllm = [
"uvloop",
"nixl<=0.7.0",
"nixl[cu12]<=0.7.1",
"vllm[flashinfer]==0.10.2",
]
sglang = [
"uvloop",
"nixl<=0.7.0",
"nixl[cu12]<=0.7.1",
"sglang==0.5.4.post3",
]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment