Unverified Commit 0b8b7ffb authored by Kyle McGill's avatar Kyle McGill Committed by GitHub
Browse files

chore: Upgrade NIXL to 0.7.1 and allow KVBM to pick NIXL cuda 12 or 13 (#4281)


Signed-off-by: default avatarTushar Sharma <tusharma@nvidia.com>
Co-authored-by: default avatarTushar Sharma <tusharma@nvidia.com>
parent d01b6d2d
...@@ -6719,9 +6719,9 @@ dependencies = [ ...@@ -6719,9 +6719,9 @@ dependencies = [
[[package]] [[package]]
name = "nixl-sys" name = "nixl-sys"
version = "0.7.0" version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a73b92494c94b2ff2d004cd9274d966863089e867dc9cd98bc640aefe7622036" checksum = "6d80bd4b5b8363cfd933000a8757a453e58ee10ee6e400c38ae31db512444a31"
dependencies = [ dependencies = [
"bindgen 0.71.1", "bindgen 0.71.1",
"cc", "cc",
......
...@@ -38,7 +38,7 @@ ARG SCCACHE_REGION="" ...@@ -38,7 +38,7 @@ ARG SCCACHE_REGION=""
# NIXL configuration # NIXL configuration
ARG NIXL_UCX_REF=v1.19.0 ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.7.0 ARG NIXL_REF=0.7.1
ARG NIXL_GDRCOPY_REF=v2.5.1 ARG NIXL_GDRCOPY_REF=v2.5.1
# Python configuration # Python configuration
...@@ -319,7 +319,14 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -319,7 +319,14 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
maturin build --release --out /opt/dynamo/dist && \ maturin build --release --out /opt/dynamo/dist && \
if [ "$ENABLE_KVBM" = "true" ]; then \ if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/bindings/kvbm && \ cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out /opt/dynamo/dist; \ maturin build --release --out target/wheels && \
auditwheel repair \
--exclude libnixl.so \
--exclude libnixl_build.so \
--exclude libnixl_common.so \
--plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \
fi && \ fi && \
/tmp/use-sccache.sh show-stats "Dynamo" /tmp/use-sccache.sh show-stats "Dynamo"
...@@ -396,6 +403,7 @@ COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_L ...@@ -396,6 +403,7 @@ COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_L
# Copy built artifacts # Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
......
...@@ -115,7 +115,7 @@ NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ...@@ -115,7 +115,7 @@ NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_REF=0.7.0 NIXL_REF=0.7.1
NIXL_UCX_REF=v1.19.0 NIXL_UCX_REF=v1.19.0
NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76 NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76
......
...@@ -58,14 +58,14 @@ If you are using a **GPU**, the following GPU models and architectures are suppo ...@@ -58,14 +58,14 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
| :----------------- | :---------- | :------------------------------------ | :----------- | | :----------------- | :---------- | :------------------------------------ | :----------- |
| ai-dynamo | 0.7.0 | >=2.28 | | | ai-dynamo | 0.7.0 | >=2.28 | |
| ai-dynamo-runtime | 0.7.0 | >=2.28 (Python 3.12 has known issues) | | | ai-dynamo-runtime | 0.7.0 | >=2.28 (Python 3.12 has known issues) | |
| NIXL | 0.7.0 | >=2.27 | >=11.8 | | NIXL | 0.7.1 | >=2.27 | >=11.8 |
### Build Dependency ### Build Dependency
| **Build Dependency** | **Version** | | **Build Dependency** | **Version** |
| :------------------- | :------------------------------------------------------------------------------- | | :------------------- | :------------------------------------------------------------------------------- |
| **TensorRT-LLM** | 1.1.0rc5 | | **TensorRT-LLM** | 1.1.0rc5 |
| **NIXL** | 0.7.0 | | **NIXL** | 0.7.1 |
| **vLLM** | 0.10.1.1 | | **vLLM** | 0.10.1.1 |
| **SGLang** | 0.5.3rc0 | | **SGLang** | 0.5.3rc0 |
......
...@@ -26,8 +26,8 @@ license = { text = "Apache-2.0" } ...@@ -26,8 +26,8 @@ license = { text = "Apache-2.0" }
license-files = ["LICENSE"] license-files = ["LICENSE"]
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"nixl==0.7.0", "nixl[cu12]==0.7.1",
"pydantic>=2.0", "pydantic>=2.0"
] ]
classifiers = [ classifiers = [
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
...@@ -45,6 +45,8 @@ classifiers = [ ...@@ -45,6 +45,8 @@ classifiers = [
keywords = ["llm", "genai", "inference", "nvidia", "kvcache", "dynamo"] keywords = ["llm", "genai", "inference", "nvidia", "kvcache", "dynamo"]
[project.optional-dependencies] [project.optional-dependencies]
cu12 = ["nixl[cu12]==0.7.1"]
cu13 = ["nixl[cu13]==0.7.1"]
test = [ test = [
"pytest>=8.3.4", "pytest>=8.3.4",
"pytest-mypy", "pytest-mypy",
...@@ -61,10 +63,10 @@ module-name = "kvbm._core" ...@@ -61,10 +63,10 @@ module-name = "kvbm._core"
manifest-path = "Cargo.toml" manifest-path = "Cargo.toml"
python-packages = ["kvbm"] python-packages = ["kvbm"]
python-source = "python" python-source = "python"
# Skip auditwheel here - we'll run it manually in dynamo/container/Dockerfile with exclusions
skip-auditwheel = true
[build-system] [build-system]
requires = ["maturin>=1.0,<2.0", "patchelf"] requires = ["maturin>=1.0,<2.0", "patchelf"]
build-backend = "maturin" build-backend = "maturin"
[tool.uv]
config-settings = { build-args = '--auditwheel repair --manylinux' }
...@@ -2,6 +2,14 @@ ...@@ -2,6 +2,14 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# flake8: noqa # flake8: noqa
import logging
logger = logging.getLogger(__name__)
# nixl needs to be loaded before any other imports to ensure that the nixl shared object is available for the KVBM core.
import nixl
logger.info(f"Loaded nixl API module: {nixl._api}")
from kvbm._core import BlockManager as BlockManager from kvbm._core import BlockManager as BlockManager
from kvbm._core import KvbmLeader as KvbmLeader from kvbm._core import KvbmLeader as KvbmLeader
......
...@@ -3,9 +3,6 @@ ...@@ -3,9 +3,6 @@
from typing import Optional from typing import Optional
# Keeping this import is important because it runs the code in nixl’s __init__.py
# to set up the Nixl plugin path.
import nixl # noqa: F401
import torch import torch
from kvbm.trtllm_integration.rust import KvConnectorWorker as RustKvConnectorWorker from kvbm.trtllm_integration.rust import KvConnectorWorker as RustKvConnectorWorker
from kvbm.utils import is_dyn_runtime_enabled from kvbm.utils import is_dyn_runtime_enabled
......
...@@ -9,9 +9,6 @@ from __future__ import annotations ...@@ -9,9 +9,6 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
# Keeping this import is important because it runs the code in nixl’s __init__.py
# to set up the Nixl plugin path when there is no pre-defined NIXL_PLUGIN_DIR
import nixl # noqa: F401
import torch import torch
from kvbm.utils import is_dyn_runtime_enabled from kvbm.utils import is_dyn_runtime_enabled
from vllm.config import VllmConfig from vllm.config import VllmConfig
......
...@@ -95,7 +95,7 @@ dialoguer = { version = "0.11", default-features = false, features = [ ...@@ -95,7 +95,7 @@ dialoguer = { version = "0.11", default-features = false, features = [
# block_manager # block_manager
aligned-vec = { version = "0.6.4", optional = true } aligned-vec = { version = "0.6.4", optional = true }
nixl-sys = { version = "=0.7.0", optional = true } nixl-sys = { version = "=0.7.1", optional = true }
cudarc = { workspace = true, optional = true } cudarc = { workspace = true, optional = true }
nix = { version = "0.26", optional = true } nix = { version = "0.26", optional = true }
......
...@@ -54,13 +54,13 @@ trtllm =[ ...@@ -54,13 +54,13 @@ trtllm =[
vllm = [ vllm = [
"uvloop", "uvloop",
"nixl<=0.7.0", "nixl[cu12]<=0.7.1",
"vllm[flashinfer]==0.10.2", "vllm[flashinfer]==0.10.2",
] ]
sglang = [ sglang = [
"uvloop", "uvloop",
"nixl<=0.7.0", "nixl[cu12]<=0.7.1",
"sglang==0.5.4.post3", "sglang==0.5.4.post3",
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment