Unverified Commit 23c42d83 authored by Sandeep Maddipatla's avatar Sandeep Maddipatla Committed by GitHub
Browse files

feat: Add support for cpu builds in dockerfiles (#7139)


Signed-off-by: default avatarSandeep Maddipatla <sandeep.maddipatla@intel.com>
parent f29753dc
...@@ -53,6 +53,12 @@ vllm: ...@@ -53,6 +53,12 @@ vllm:
base_image_tag: 2025.3.2-0-devel-ubuntu24.04 base_image_tag: 2025.3.2-0-devel-ubuntu24.04
runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04 runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04
vllm_ref: v0.14.0 vllm_ref: v0.14.0
cpu:
base_image: ubuntu
runtime_image: ubuntu
base_image_tag: 24.04
runtime_image_tag: 24.04
vllm_ref: v0.16.0
flashinf_ref: v0.6.4 flashinf_ref: v0.6.4
lmcache_ref: 0.4.1 lmcache_ref: 0.4.1
vllm_omni_ref: "v0.16.0" vllm_omni_ref: "v0.16.0"
......
...@@ -128,7 +128,7 @@ if [ "$DEVICE" = "cuda" ]; then ...@@ -128,7 +128,7 @@ if [ "$DEVICE" = "cuda" ]; then
echo "\n=== Configuration Summary ===" echo "\n=== Configuration Summary ==="
echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND" echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND"
echo " TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR" echo " TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR"
elif [ "$DEVICE" = "xpu" ]; then elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then
echo "\n=== Configuration Summary ===" echo "\n=== Configuration Summary ==="
echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | INSTALLATION_DIR=$INSTALLATION_DIR" echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | INSTALLATION_DIR=$INSTALLATION_DIR"
fi fi
...@@ -191,6 +191,21 @@ if [ "$DEVICE" = "cuda" ]; then ...@@ -191,6 +191,21 @@ if [ "$DEVICE" = "cuda" ]; then
uv pip install flashinfer-cubin==$FLASHINF_REF uv pip install flashinfer-cubin==$FLASHINF_REF
uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND} uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND}
fi fi
if [ "$DEVICE" = "cpu" ]; then
echo "\n=== Installing vLLM for cpu ==="
if [ -n "${CACHE_BUSTER:-}" ]; then
echo "$CACHE_BUSTER" > /tmp/builder-buster
fi
# vLLM CPU requirements pin torch with a +cpu local version (e.g. 2.10.0+cpu),
# which is published on the PyTorch CPU wheel index instead of PyPI.
# Install torchvision, torchaudio from the same index to get the correct versions with +cpu suffix.
uv pip install -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
VLLM_TARGET_DEVICE=cpu \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
uv pip install dist/*.whl
fi
echo "✓ vLLM installation completed" echo "✓ vLLM installation completed"
echo "\n=== Installing LMCache from source ===" echo "\n=== Installing LMCache from source ==="
......
...@@ -51,7 +51,7 @@ def parse_args(): ...@@ -51,7 +51,7 @@ def parse_args():
"--device", "--device",
type=str, type=str,
default="cuda", default="cuda",
choices=["cuda", "xpu"], choices=["cuda", "xpu", "cpu"],
help="Dockerfile device to use", help="Dockerfile device to use",
) )
...@@ -99,7 +99,7 @@ def parse_args(): ...@@ -99,7 +99,7 @@ def parse_args():
def validate_args(args): def validate_args(args):
valid_inputs = { valid_inputs = {
"vllm": { "vllm": {
"device": ["cuda", "xpu"], "device": ["cuda", "xpu", "cpu"],
"target": [ "target": [
"runtime", "runtime",
"dev", "dev",
......
...@@ -37,7 +37,7 @@ ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }} ...@@ -37,7 +37,7 @@ ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }}
{%- endif %} {%- endif %}
# wheel builder image selection # wheel builder image selection
{% if device == "xpu" %} {% if device == "xpu" or device == "cpu" %}
ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG} ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG}
{% elif platform == "multi" %} {% elif platform == "multi" %}
{# Multi-arch: manylinux selection is handled via --platform-pinned stage aliases #} {# Multi-arch: manylinux selection is handled via --platform-pinned stage aliases #}
...@@ -130,4 +130,4 @@ ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }} ...@@ -130,4 +130,4 @@ ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{% if make_efa == true %} {% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }} ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }} ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
{%- endif -%} {%- endif -%}
\ No newline at end of file
...@@ -14,6 +14,12 @@ ARG TARGETARCH ...@@ -14,6 +14,12 @@ ARG TARGETARCH
USER root USER root
WORKDIR /opt/dynamo WORKDIR /opt/dynamo
{% if device == "cpu" %}
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim
{% endif %}
# Install sccache into the base image so downstream stages can COPY it # Install sccache into the base image so downstream stages can COPY it
# instead of downloading from GitHub (avoids 502 errors under parallel builds) # instead of downloading from GitHub (avoids 502 errors under parallel builds)
ARG SCCACHE_VERSION=v0.14.0 ARG SCCACHE_VERSION=v0.14.0
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# PURPOSE: Framework development and vLLM compilation # PURPOSE: Framework development and vLLM compilation
# #
# This stage builds and compiles framework dependencies including: # This stage builds and compiles framework dependencies including:
# - vLLM inference engine with CUDA/XPU support # - vLLM inference engine with CUDA/XPU/CPU support
# - DeepGEMM and FlashInfer optimizations # - DeepGEMM and FlashInfer optimizations
# - All necessary build tools and compilation dependencies # - All necessary build tools and compilation dependencies
# - Framework-level Python packages and extensions # - Framework-level Python packages and extensions
...@@ -29,6 +29,10 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/ ...@@ -29,6 +29,10 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG DEVICE ARG DEVICE
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds. # Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update -y \ apt-get update -y \
...@@ -88,12 +92,34 @@ ENV VLLM_TARGET_DEVICE=xpu ...@@ -88,12 +92,34 @@ ENV VLLM_TARGET_DEVICE=xpu
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
{% endif %} {% endif %}
{% if device == "cpu" %}
## Use guidelines from https://docs.vllm.ai/en/stable/getting_started/installation/cpu/#build-image-from-source
## to build a cross compiled target to support AVX512, AMX ISA's
## vllm-0.16 has a bug that handles non-AVX512 supported cases incorrectly
## - https://github.com/vllm-project/vllm/issues/33991
## - Build settings chosen to cross-compile with AVX512 support on amd64 only.
ENV VLLM_TARGET_DEVICE=cpu
ARG VLLM_CPU_DISABLE_AVX512=false # If false, decide based on build-machine support or below flags (latter overrides former). If true, disable AVX512 support.
ARG VLLM_CPU_AVX512=true # Support for building with AVX512 ISA (Explicitly enable to cross-compile)
ARG VLLM_CPU_AVX512BF16=true # Support for building with AVX512BF16 ISA
ARG VLLM_CPU_AVX512VNNI=false # Support for building with VLLM_CPU_AVX512VNNI ISA
ARG VLLM_CPU_AMXBF16=true # Support for building with AMXBF16 ISA
{% endif %}
# Install VLLM and related dependencies # Install VLLM and related dependencies
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \ export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \
if [ "$DEVICE" = "cpu" ] && [ "$TARGETARCH" = "amd64" ]; then \
export VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} \
VLLM_CPU_AVX512=${VLLM_CPU_AVX512} \
VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} \
VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI} \
VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}; \
fi && \
/tmp/install_vllm.sh \ /tmp/install_vllm.sh \
--device $DEVICE \ --device $DEVICE \
--vllm-ref $VLLM_REF \ --vllm-ref $VLLM_REF \
......
...@@ -67,12 +67,11 @@ ENV CPATH=/usr/local/cuda/include \ ...@@ -67,12 +67,11 @@ ENV CPATH=/usr/local/cuda/include \
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
{% if device == "xpu" %}
ENV PATH=/usr/local/bin/etcd/:$PATH
{% else %}
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH {% if device == "cuda" %}
ENV PATH=/usr/local/cuda/nvvm/bin:$PATH
{% endif %} {% endif %}
ENV PATH=/usr/local/bin/etcd/:$PATH
# Copy uv to system /bin # Copy uv to system /bin
COPY --from=dynamo_base /bin/uv /bin/uvx /bin/ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
...@@ -153,6 +152,31 @@ RUN wget https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/int ...@@ -153,6 +152,31 @@ RUN wget https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/int
ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
{% endif %} {% endif %}
{% if device == "cpu" %}
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl ca-certificates zip unzip git lsb-release numactl wget vim \
gcc-12 g++-12 ccache \
libtcmalloc-minimal4 libnuma-dev \
ffmpeg libsm6 libxext6 libgl1 jq lsof && \
update-ca-certificates && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
curl -LsSf https://astral.sh/uv/install.sh | sh
ENV CCACHE_DIR=/root/.cache/ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/dynamo/venv"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} && \
mkdir -p ${VIRTUAL_ENV}/include/site/python${PYTHON_VERSION} && \
chown -R dynamo:0 ${VIRTUAL_ENV} && \
chmod -R g+w ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
{% endif %}
{% if context.vllm.enable_media_ffmpeg == "true" %} {% if context.vllm.enable_media_ffmpeg == "true" %}
# Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo) # Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
...@@ -172,6 +196,10 @@ SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"] ...@@ -172,6 +196,10 @@ SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
ENV NIXL_PREFIX=/opt/intel/intel_nixl ENV NIXL_PREFIX=/opt/intel/intel_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
{% elif device == "cpu" %}
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
{% else %} {% else %}
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib64 ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib64
...@@ -224,7 +252,7 @@ COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX ...@@ -224,7 +252,7 @@ COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
{# XPU NIXL uses lib/x86_64-linux-gnu; copy to NIXL_LIB_DIR to ensure lib dir is populated #} {# XPU NIXL uses lib/x86_64-linux-gnu; copy to NIXL_LIB_DIR to ensure lib dir is populated #}
COPY --chown=dynamo: --from=wheel_builder /opt/intel/intel_nixl/lib/x86_64-linux-gnu/. ${NIXL_LIB_DIR}/ COPY --chown=dynamo: --from=wheel_builder /opt/intel/intel_nixl/lib/x86_64-linux-gnu/. ${NIXL_LIB_DIR}/
{% endif %} {% endif %}
{# For cuda: NIXL_LIB_DIR = lib64, already included in the $NIXL_PREFIX COPY above #} {# For cpu/cuda: NIXL libs are already included in the $NIXL_PREFIX COPY above #}
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/ COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
...@@ -236,15 +264,19 @@ $NIXL_LIB_DIR:\ ...@@ -236,15 +264,19 @@ $NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\ /usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH ${LD_LIBRARY_PATH:-}
{% if device == "cuda" %} {% if device == "cuda" %}
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$LD_LIBRARY_PATH ${LD_LIBRARY_PATH:-}
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %} {% endif %}
{% if device == "cpu" %}
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:${VIRTUAL_ENV}/lib/libiomp5.so"
{% endif %}
# TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed) # TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/ COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if target not in ("dev", "local-dev") %} {% if target not in ("dev", "local-dev") %}
...@@ -387,7 +419,9 @@ RUN uv pip uninstall triton triton-xpu && \ ...@@ -387,7 +419,9 @@ RUN uv pip uninstall triton triton-xpu && \
uv pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu && \ uv pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu && \
uv pip uninstall oneccl && \ uv pip uninstall oneccl && \
uv pip uninstall oneccl-devel uv pip uninstall oneccl-devel
{%endif%}
{% if device == "xpu" or device == "cpu" %}
SHELL ["bash", "-c"] SHELL ["bash", "-c"]
CMD ["bash", "-c", "source /etc/bash.bashrc && exec bash"] CMD ["bash", "-c", "source /etc/bash.bashrc && exec bash"]
{% else %} {% else %}
......
...@@ -33,6 +33,17 @@ ARG CARGO_BUILD_JOBS ...@@ -33,6 +33,17 @@ ARG CARGO_BUILD_JOBS
ARG DEVICE ARG DEVICE
WORKDIR /workspace WORKDIR /workspace
{% if device == "xpu" or device == "cpu" %}
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim \
libsndfile1 \
libsm6 \
libxext6 \
libgl1 \
libaio-dev \
linux-libc-dev
{% endif %}
{% if device == "cuda" %} {% if device == "cuda" %}
# Copy CUDA from base stage # Copy CUDA from base stage
...@@ -54,37 +65,22 @@ COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME ...@@ -54,37 +65,22 @@ COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
{% if device == "xpu" %} {% if device == "xpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \ echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
add-apt-repository -y ppa:kobuk-team/intel-graphics add-apt-repository -y ppa:kobuk-team/intel-graphics
# Fetch UCX patch
RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch
RUN apt clean && apt-get update -y && \ # Install Intel GPU runtime packages
apt-get install -y --no-install-recommends --fix-missing \ RUN apt update -y && apt upgrade -y && \
curl \
#ffmpeg \
ca-certificates \
zip \
unzip \
git \
libsndfile1 \
libsm6 \
libxext6 \
libgl1 \
lsb-release \
libaio-dev \
numactl \
wget \
vim \
linux-libc-dev && \
# Install Intel GPU runtime packages
apt update -y && apt upgrade -y && \
apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd \ apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd \
libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \ libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
apt-get clean && rm -rf /var/lib/apt/lists/* apt-get clean && rm -rf /var/lib/apt/lists/*
{% endif %}
{% if device == "xpu" or device == "cpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update -y \ RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# NIXL build dependencies # NIXL build dependencies
...@@ -189,14 +185,14 @@ RUN set -eux; \ ...@@ -189,14 +185,14 @@ RUN set -eux; \
# Point build tools explicitly at the modern protoc # Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc ENV PROTOC=/usr/local/bin/protoc
{% if device == "xpu" %} {% if device == "xpu" or device == "cpu" %}
# Install uv package manager # Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-}
{% else %} {% else %}
ENV CUDA_PATH=/usr/local/cuda \ ENV CUDA_PATH=/usr/local/cuda \
PATH=/usr/local/cuda/bin:$PATH \ PATH=/usr/local/cuda/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-} \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %} {% endif %}
...@@ -253,11 +249,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -253,11 +249,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
if [ "$USE_SCCACHE" = "true" ]; then \ if [ "$USE_SCCACHE" = "true" ]; then \
eval $(/tmp/use-sccache.sh setup-env); \ eval $(/tmp/use-sccache.sh setup-env); \
fi && \ fi && \
if [ "$DEVICE" = "xpu" ]; then \ if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
apt-get update -y && apt-get install -y pkg-config; \ apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
apt-get clean && rm -rf /var/lib/apt/lists/*; \ apt-get clean && rm -rf /var/lib/apt/lists/*; \
elif [ "$DEVICE" = "cuda" ]; then \ elif [ "$DEVICE" = "cuda" ]; then \
dnf install -y pkg-config; \ dnf install -y pkg-config xz; \
fi && \ fi && \
cd /tmp && \ cd /tmp && \
curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \ curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
...@@ -331,6 +327,18 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -331,6 +327,18 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--with-gdrcopy=/usr/local \ --with-gdrcopy=/usr/local \
--with-efa \ --with-efa \
--enable-mt; \ --enable-mt; \
elif [ "$DEVICE" = "cpu" ]; then \
./contrib/configure-release \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-verbs \
--without-cuda \
--enable-mt; \
fi && \ fi && \
make -j && \ make -j && \
make -j install-strip && \ make -j install-strip && \
...@@ -495,8 +503,8 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -495,8 +503,8 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
git checkout ${NIXL_REF} && \ git checkout ${NIXL_REF} && \
if [ "$DEVICE" = "cuda" ]; then \ if [ "$DEVICE" = "cuda" ]; then \
PKG_NAME="nixl-cu${CUDA_MAJOR}"; \ PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
elif [ "$DEVICE" = "xpu" ]; then \ else \
PKG_NAME="nixl-xpu"; \ PKG_NAME="nixl-${DEVICE}"; \
fi && \ fi && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \ ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \ mkdir build && \
...@@ -509,6 +517,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -509,6 +517,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
elif [ "$DEVICE" = "xpu" ]; then \ elif [ "$DEVICE" = "xpu" ]; then \
meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \ meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
-Ducx_path="/usr/local/ucx"; \ -Ducx_path="/usr/local/ucx"; \
elif [ "$DEVICE" = "cpu" ]; then \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Ducx_path="/usr/local/ucx"; \
fi && \ fi && \
cd build && \ cd build && \
ninja && \ ninja && \
...@@ -520,6 +531,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -520,6 +531,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \ ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \ NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/intel/intel_nixl NIXL_PREFIX=/opt/intel/intel_nixl
{% elif device == "cpu" %}
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
{% else %} {% else %}
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
...@@ -579,7 +594,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -579,7 +594,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--plat manylinux_2_28_${ARCH_ALT} \ --plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \ --wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \ target/wheels/*.whl; \
elif [ "$DEVICE" = "xpu" ]; then \ elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
cp target/wheels/*.whl /opt/dynamo/dist/; \ cp target/wheels/*.whl /opt/dynamo/dist/; \
fi; \ fi; \
fi && \ fi && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment