Unverified Commit dc75cf18 authored by ptarasiewiczNV's avatar ptarasiewiczNV Committed by GitHub
Browse files

chore: Move NIXL repo clone to Dockerfiles (#2009)

parent f6f392c8
...@@ -29,20 +29,6 @@ ARG ARCH_ALT=x86_64 ...@@ -29,20 +29,6 @@ ARG ARCH_ALT=x86_64
ARG SGLANG_VERSION="0.4.9.post1" ARG SGLANG_VERSION="0.4.9.post1"
ARG SGL_KERNEL_VERSION="0.2.4" ARG SGL_KERNEL_VERSION="0.2.4"
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
################################## ##################################
########## Base Image ############ ########## Base Image ############
################################## ##################################
...@@ -52,6 +38,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base ...@@ -52,6 +38,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage # Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
USER root USER root
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
...@@ -111,9 +98,10 @@ SHELL ["/bin/bash", "-c"] ...@@ -111,9 +98,10 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace WORKDIR /workspace
### NIXL SETUP ### ### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint # Clone nixl source, and checkout the nixl ref
COPY --from=nixl_base /opt/nixl /opt/nixl RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \ cd /opt/nixl && \
mkdir build && \ mkdir build && \
......
...@@ -34,16 +34,6 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04" ...@@ -34,16 +34,6 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
################################## ##################################
########## Build Image ########### ########## Build Image ###########
################################## ##################################
...@@ -53,6 +43,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build ...@@ -53,6 +43,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
# Redeclare ARCH and ARCH_ALT so they're available in this build stage # Redeclare ARCH and ARCH_ALT so they're available in this build stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
USER root USER root
...@@ -101,9 +92,10 @@ ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH ...@@ -101,9 +92,10 @@ ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"] SHELL ["/bin/bash", "-c"]
# NIXL SETUP # NIXL SETUP
# Copy nixl source, and use commit hash as cache hint # Clone nixl source, and checkout the nixl ref
COPY --from=nixl_base /opt/nixl /opt/nixl RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \ cd /opt/nixl && \
mkdir build && \ mkdir build && \
......
...@@ -26,20 +26,6 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ...@@ -26,20 +26,6 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
################################## ##################################
########## Base Image ############ ########## Base Image ############
################################## ##################################
...@@ -80,6 +66,7 @@ RUN apt-get update -y && \ ...@@ -80,6 +66,7 @@ RUN apt-get update -y && \
libnuma-dev librdmacm-dev ibverbs-providers libnuma-dev librdmacm-dev ibverbs-providers
ARG NIXL_UCX_REF=v1.19.x ARG NIXL_UCX_REF=v1.19.x
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
WORKDIR /workspace WORKDIR /workspace
...@@ -118,10 +105,11 @@ SHELL ["/bin/bash", "-c"] ...@@ -118,10 +105,11 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace WORKDIR /workspace
### NIXL SETUP ### ### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint # Clone nixl source
# TEMP: disable gds backend for arm64 # TEMP: disable gds backend for arm64
COPY --from=nixl_base /opt/nixl /opt/nixl RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \ cd /opt/nixl && \
mkdir build && \ mkdir build && \
...@@ -199,9 +187,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ ...@@ -199,9 +187,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \ cd tools/ep_kernels && \
bash install_python_libraries.sh && \ bash install_python_libraries.sh && \
cd ep_kernels_workspace && \ cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \ git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \ cd DeepGEMM && \
python setup.py install; \ sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
else \ else \
uv pip install pip cuda-python && \ uv pip install pip cuda-python && \
mkdir /opt/vllm && \ mkdir /opt/vllm && \
...@@ -213,9 +205,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ ...@@ -213,9 +205,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \ cd tools/ep_kernels && \
bash install_python_libraries.sh && \ bash install_python_libraries.sh && \
cd ep_kernels_workspace && \ cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \ git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \ cd DeepGEMM && \
python setup.py install; \ sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
fi fi
# Common dependencies # Common dependencies
......
...@@ -111,9 +111,7 @@ NONE_BASE_IMAGE_TAG="24.04" ...@@ -111,9 +111,7 @@ NONE_BASE_IMAGE_TAG="24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4 NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_REPO=ai-dynamo/nixl.git
NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b
NO_CACHE="" NO_CACHE=""
...@@ -390,7 +388,7 @@ if [[ "$PLATFORM" == *"linux/arm64"* ]]; then ...@@ -390,7 +388,7 @@ if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
ARCH="arm64" ARCH="arm64"
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 " BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails # TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8 NIXL_REF=3503658e71143b56f9d5b1b440d84a94b9c41af8
fi fi
# Update DOCKERFILE if framework is VLLM # Update DOCKERFILE if framework is VLLM
...@@ -404,36 +402,8 @@ elif [[ $FRAMEWORK == "SGLANG" ]]; then ...@@ -404,36 +402,8 @@ elif [[ $FRAMEWORK == "SGLANG" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
fi fi
NIXL_DIR="/tmp/nixl/nixl_src" # Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
# Clone original NIXL to temp directory
if [ -d "$NIXL_DIR" ]; then
echo "Warning: $NIXL_DIR already exists, skipping clone"
else
if [ -n "${GITHUB_TOKEN}" ]; then
git clone "https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO}" "$NIXL_DIR"
else
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$NIXL_DIR"; then
echo "HTTPS clone failed, falling back to SSH..."
git clone git@github.com:${NIXL_REPO} "$NIXL_DIR"
fi
fi
fi
pushd "$NIXL_DIR" || exit
if ! git checkout ${NIXL_COMMIT}; then
echo "ERROR: Failed to checkout NIXL commit ${NIXL_COMMIT}. The cached directory may be out of date."
echo "Please delete $NIXL_DIR and re-run the build script."
exit 1
fi
popd
BUILD_CONTEXT_ARG+=" --build-context nixl=$NIXL_DIR"
# Add NIXL_COMMIT as a build argument to enable caching
BUILD_ARGS+=" --build-arg NIXL_COMMIT=${NIXL_COMMIT} "
if [[ $TARGET == "local-dev" ]]; then if [[ $TARGET == "local-dev" ]]; then
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) " BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
...@@ -519,7 +489,7 @@ if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then ...@@ -519,7 +489,7 @@ if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}" echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}" "${ARCH}_${TRTLLM_COMMIT}"; then if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}" "${ARCH}_${TRTLLM_COMMIT}"; then
echo "WARN: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}, attempting to build from source" echo "WARN: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}, attempting to build from source"
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_COMMIT}; then if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF}; then
error "ERROR: Failed to build TensorRT-LLM wheel" error "ERROR: Failed to build TensorRT-LLM wheel"
fi fi
fi fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment