Unverified Commit dc75cf18 authored by ptarasiewiczNV's avatar ptarasiewiczNV Committed by GitHub
Browse files

chore: Move NIXL repo clone to Dockerfiles (#2009)

parent f6f392c8
......@@ -29,20 +29,6 @@ ARG ARCH_ALT=x86_64
ARG SGLANG_VERSION="0.4.9.post1"
ARG SGL_KERNEL_VERSION="0.2.4"
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
##################################
########## Base Image ############
##################################
......@@ -52,6 +38,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
USER root
ARG PYTHON_VERSION=3.12
......@@ -111,9 +98,10 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
# Clone nixl source, and checkout the nixl ref
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
......
......@@ -34,16 +34,6 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
##################################
########## Build Image ###########
##################################
......@@ -53,6 +43,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
# Redeclare ARCH and ARCH_ALT so they're available in this build stage
ARG ARCH
ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
USER root
......@@ -101,9 +92,10 @@ ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]
# NIXL SETUP
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
# Clone nixl source, and checkout the nixl ref
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
......
......@@ -26,20 +26,6 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
##################################
########## Base Image ############
##################################
......@@ -80,6 +66,7 @@ RUN apt-get update -y && \
libnuma-dev librdmacm-dev ibverbs-providers
ARG NIXL_UCX_REF=v1.19.x
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
WORKDIR /workspace
......@@ -118,10 +105,11 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
# Clone nixl source
# TEMP: disable gds backend for arm64
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
......@@ -199,9 +187,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
else \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
......@@ -213,9 +205,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
fi
# Common dependencies
......
......@@ -111,9 +111,7 @@ NONE_BASE_IMAGE_TAG="24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_REPO=ai-dynamo/nixl.git
NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b
NO_CACHE=""
......@@ -390,7 +388,7 @@ if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
ARCH="arm64"
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
NIXL_REF=3503658e71143b56f9d5b1b440d84a94b9c41af8
fi
# Update DOCKERFILE if framework is VLLM
......@@ -404,36 +402,8 @@ elif [[ $FRAMEWORK == "SGLANG" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
fi
NIXL_DIR="/tmp/nixl/nixl_src"
# Clone original NIXL to temp directory
if [ -d "$NIXL_DIR" ]; then
echo "Warning: $NIXL_DIR already exists, skipping clone"
else
if [ -n "${GITHUB_TOKEN}" ]; then
git clone "https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO}" "$NIXL_DIR"
else
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$NIXL_DIR"; then
echo "HTTPS clone failed, falling back to SSH..."
git clone git@github.com:${NIXL_REPO} "$NIXL_DIR"
fi
fi
fi
pushd "$NIXL_DIR" || exit
if ! git checkout ${NIXL_COMMIT}; then
echo "ERROR: Failed to checkout NIXL commit ${NIXL_COMMIT}. The cached directory may be out of date."
echo "Please delete $NIXL_DIR and re-run the build script."
exit 1
fi
popd
BUILD_CONTEXT_ARG+=" --build-context nixl=$NIXL_DIR"
# Add NIXL_COMMIT as a build argument to enable caching
BUILD_ARGS+=" --build-arg NIXL_COMMIT=${NIXL_COMMIT} "
# Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
if [[ $TARGET == "local-dev" ]]; then
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
......@@ -519,7 +489,7 @@ if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}" "${ARCH}_${TRTLLM_COMMIT}"; then
echo "WARN: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}, attempting to build from source"
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_COMMIT}; then
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF}; then
error "ERROR: Failed to build TensorRT-LLM wheel"
fi
fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment