Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ae03c857
Unverified
Commit
ae03c857
authored
Jan 12, 2026
by
Erez Zarum
Committed by
GitHub
Jan 12, 2026
Browse files
fix: NIXL CUDA12 + CUDA13 build (#5000)
Signed-off-by:
Erez Zarum
<
erezz@amazon.com
>
parent
f6ae58e3
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
63 additions
and
12 deletions
+63
-12
container/Dockerfile
container/Dockerfile
+11
-3
container/Dockerfile.aws
container/Dockerfile.aws
+6
-0
container/Dockerfile.sglang
container/Dockerfile.sglang
+14
-3
container/Dockerfile.trtllm
container/Dockerfile.trtllm
+13
-3
container/Dockerfile.vllm
container/Dockerfile.vllm
+11
-3
container/build.sh
container/build.sh
+8
-0
No files found.
container/Dockerfile
View file @
ae03c857
...
...
@@ -199,7 +199,7 @@ ENV CUDA_PATH=/usr/local/cuda \
ARG
PYTHON_VERSION
ENV
VIRTUAL_ENV=/workspace/.venv
RUN
uv venv
${
VIRTUAL_ENV
}
--python
$PYTHON_VERSION
&&
\
uv pip
install
--upgrade
meson pybind11 patchelf maturin[patchelf]
uv pip
install
--upgrade
meson pybind11 patchelf maturin[patchelf]
tomlkit
ARG
NIXL_UCX_REF
ARG
NIXL_REF
...
...
@@ -344,8 +344,16 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export
CMAKE_CUDA_COMPILER_LAUNCHER
=
"sccache"
;
\
fi
&&
\
source
${
VIRTUAL_ENV
}
/bin/activate
&&
\
git clone
--depth
1
--branch
${
NIXL_REF
}
"https://github.com/ai-dynamo/nixl.git"
&&
\
git clone
"https://github.com/ai-dynamo/nixl.git"
&&
\
cd
nixl
&&
\
git checkout
${
NIXL_REF
}
&&
\
CUDA_MAJOR
=
$(
nvcc
--version
|
grep
-Eo
'release [0-9]+\.[0-9]+'
|
cut
-d
' '
-f2
|
cut
-d
'.'
-f1
)
&&
\
if
[
"
$CUDA_MAJOR
"
-ne
12
]
&&
[
"
$CUDA_MAJOR
"
-ne
13
]
;
then
\
echo
"Invalid CUDA_MAJOR: '
$CUDA_MAJOR
'"
&&
\
exit
1
;
\
fi
&&
\
PKG_NAME
=
"nixl-cu
${
CUDA_MAJOR
}
"
&&
\
./contrib/tomlutil.py
--wheel-name
$PKG_NAME
pyproject.toml
&&
\
mkdir
build
&&
\
meson setup build/
--prefix
=
/opt/nvidia/nvda_nixl
--buildtype
=
release
\
-Dcudapath_lib
=
"/usr/local/cuda/lib64"
\
...
...
@@ -375,7 +383,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export
CMAKE_CUDA_COMPILER_LAUNCHER
=
"sccache"
;
\
fi
&&
\
cd
/workspace/nixl
&&
\
uv build
.
--out-dir
/opt/dynamo/dist/nixl
--python
$PYTHON_VERSION
uv build
.
--wheel
--out-dir
/opt/dynamo/dist/nixl
--python
$PYTHON_VERSION
# Copy source code (order matters for layer caching)
COPY
pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
...
...
container/Dockerfile.aws
View file @
ae03c857
...
...
@@ -43,8 +43,11 @@ RUN mkdir -p /tmp/efa && \
apt-get update && \
./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
rm -rf /tmp/efa && \
rm -rf /opt/amazon/aws-ofi-nccl && \
ldconfig
ENV EFA_VERSION="${EFA_VERSION}"
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
...
...
@@ -78,7 +81,10 @@ RUN mkdir -p /tmp/efa && \
apt-get update && \
./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
rm -rf /tmp/efa && \
rm -rf /opt/amazon/aws-ofi-nccl && \
ldconfig
ENV EFA_VERSION="${EFA_VERSION}"
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
container/Dockerfile.sglang
View file @
ae03c857
...
...
@@ -212,7 +212,7 @@ ENV CUDA_PATH=/usr/local/cuda \
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
tomlkit
ARG NIXL_UCX_REF
ARG NIXL_REF
...
...
@@ -356,8 +356,16 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone
--depth 1 --branch ${NIXL_REF}
"https://github.com/ai-dynamo/nixl.git" && \
git clone "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
git checkout ${NIXL_REF} && \
CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
exit 1; \
fi && \
PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
...
...
@@ -387,7 +395,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
uv build .
--wheel
--out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
...
...
@@ -481,12 +489,15 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
RUN --mount=type=bind,source=.,target=/mnt/local_src \
pip install --no-cache-dir --break-system-packages \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
sglang==${SGLANG_VERSION}
# Install common and test dependencies
...
...
container/Dockerfile.trtllm
View file @
ae03c857
...
...
@@ -223,7 +223,7 @@ ENV CUDA_PATH=/usr/local/cuda \
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
tomlkit
ARG NIXL_UCX_REF
ARG NIXL_REF
...
...
@@ -367,8 +367,16 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone
--depth 1 --branch ${NIXL_REF}
"https://github.com/ai-dynamo/nixl.git" && \
git clone "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
git checkout ${NIXL_REF} && \
CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
exit 1; \
fi && \
PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
...
...
@@ -398,7 +406,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
uv build .
--wheel
--out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
...
...
@@ -747,6 +755,8 @@ $NIXL_PLUGIN_DIR:\
$TENSORRT_LIB_DIR:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib:\
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib:\
/usr/local/cuda/lib:\
/usr/local/cuda/lib64:\
$LD_LIBRARY_PATH
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
ENV OPAL_PREFIX=/opt/hpcx/ompi
...
...
container/Dockerfile.vllm
View file @
ae03c857
...
...
@@ -229,7 +229,7 @@ ENV CUDA_PATH=/usr/local/cuda \
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf]
tomlkit
ARG NIXL_UCX_REF
ARG NIXL_REF
...
...
@@ -395,8 +395,16 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone
--depth 1 --branch ${NIXL_REF}
"https://github.com/ai-dynamo/nixl.git" && \
git clone "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
git checkout ${NIXL_REF} && \
CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
exit 1; \
fi && \
PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
...
...
@@ -426,7 +434,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
uv build .
--wheel
--out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
...
...
container/build.sh
View file @
ae03c857
...
...
@@ -183,6 +183,14 @@ get_options() {
missing_requirement
"
$1
"
fi
;;
--nixl-ref
)
if
[
"
$2
"
]
;
then
NIXL_REF
=
$2
shift
else
missing_requirement
"
$1
"
fi
;;
--tensorrtllm-pip-wheel-dir
)
if
[
"
$2
"
]
;
then
TENSORRTLLM_PIP_WHEEL_DIR
=
$2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment