Unverified Commit 23c42d83 authored by Sandeep Maddipatla's avatar Sandeep Maddipatla Committed by GitHub
Browse files

feat: Add support for cpu builds in dockerfiles (#7139)


Signed-off-by: default avatarSandeep Maddipatla <sandeep.maddipatla@intel.com>
parent f29753dc
......@@ -53,6 +53,12 @@ vllm:
base_image_tag: 2025.3.2-0-devel-ubuntu24.04
runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04
vllm_ref: v0.14.0
cpu:
base_image: ubuntu
runtime_image: ubuntu
base_image_tag: 24.04
runtime_image_tag: 24.04
vllm_ref: v0.16.0
flashinf_ref: v0.6.4
lmcache_ref: 0.4.1
vllm_omni_ref: "v0.16.0"
......
......@@ -128,7 +128,7 @@ if [ "$DEVICE" = "cuda" ]; then
echo "\n=== Configuration Summary ==="
echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND"
echo " TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR"
elif [ "$DEVICE" = "xpu" ]; then
elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then
echo "\n=== Configuration Summary ==="
echo " VLLM_REF=$VLLM_REF | ARCH=$ARCH | INSTALLATION_DIR=$INSTALLATION_DIR"
fi
......@@ -191,6 +191,21 @@ if [ "$DEVICE" = "cuda" ]; then
uv pip install flashinfer-cubin==$FLASHINF_REF
uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND}
fi
if [ "$DEVICE" = "cpu" ]; then
echo "\n=== Installing vLLM for cpu ==="
if [ -n "${CACHE_BUSTER:-}" ]; then
echo "$CACHE_BUSTER" > /tmp/builder-buster
fi
# vLLM CPU requirements pin torch with a +cpu local version (e.g. 2.10.0+cpu),
# which is published on the PyTorch CPU wheel index instead of PyPI.
# Install torchvision, torchaudio from the same index to get the correct versions with +cpu suffix.
uv pip install -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
uv pip install torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
VLLM_TARGET_DEVICE=cpu \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
uv pip install dist/*.whl
fi
echo "✓ vLLM installation completed"
echo "\n=== Installing LMCache from source ==="
......
......@@ -51,7 +51,7 @@ def parse_args():
"--device",
type=str,
default="cuda",
choices=["cuda", "xpu"],
choices=["cuda", "xpu", "cpu"],
help="Dockerfile device to use",
)
......@@ -99,7 +99,7 @@ def parse_args():
def validate_args(args):
valid_inputs = {
"vllm": {
"device": ["cuda", "xpu"],
"device": ["cuda", "xpu", "cpu"],
"target": [
"runtime",
"dev",
......
......@@ -37,7 +37,7 @@ ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }}
{%- endif %}
# wheel builder image selection
{% if device == "xpu" %}
{% if device == "xpu" or device == "cpu" %}
ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG}
{% elif platform == "multi" %}
{# Multi-arch: manylinux selection is handled via --platform-pinned stage aliases #}
......@@ -130,4 +130,4 @@ ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
{%- endif -%}
\ No newline at end of file
{%- endif -%}
......@@ -14,6 +14,12 @@ ARG TARGETARCH
USER root
WORKDIR /opt/dynamo
{% if device == "cpu" %}
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim
{% endif %}
# Install sccache into the base image so downstream stages can COPY it
# instead of downloading from GitHub (avoids 502 errors under parallel builds)
ARG SCCACHE_VERSION=v0.14.0
......
......@@ -10,7 +10,7 @@
# PURPOSE: Framework development and vLLM compilation
#
# This stage builds and compiles framework dependencies including:
# - vLLM inference engine with CUDA/XPU support
# - vLLM inference engine with CUDA/XPU/CPU support
# - DeepGEMM and FlashInfer optimizations
# - All necessary build tools and compilation dependencies
# - Framework-level Python packages and extensions
......@@ -29,6 +29,10 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
ARG PYTHON_VERSION
ARG DEVICE
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update -y \
......@@ -88,12 +92,34 @@ ENV VLLM_TARGET_DEVICE=xpu
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
{% endif %}
{% if device == "cpu" %}
## Use guidelines from https://docs.vllm.ai/en/stable/getting_started/installation/cpu/#build-image-from-source
## to build a cross compiled target to support AVX512, AMX ISA's
## vllm-0.16 has a bug that handles non-AVX512 supported cases incorrectly
## - https://github.com/vllm-project/vllm/issues/33991
## - Build settings chosen to cross-compile with AVX512 support on amd64 only.
ENV VLLM_TARGET_DEVICE=cpu
ARG VLLM_CPU_DISABLE_AVX512=false # If false, decide based on build-machine support or below flags (latter overrides former). If true, disable AVX512 support.
ARG VLLM_CPU_AVX512=true # Support for building with AVX512 ISA (Explicitly enable to cross-compile)
ARG VLLM_CPU_AVX512BF16=true # Support for building with AVX512BF16 ISA
ARG VLLM_CPU_AVX512VNNI=false # Support for building with VLLM_CPU_AVX512VNNI ISA
ARG VLLM_CPU_AMXBF16=true # Support for building with AMXBF16 ISA
{% endif %}
# Install VLLM and related dependencies
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
if [ "$DEVICE" = "cpu" ] && [ "$TARGETARCH" = "amd64" ]; then \
export VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} \
VLLM_CPU_AVX512=${VLLM_CPU_AVX512} \
VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} \
VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI} \
VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}; \
fi && \
/tmp/install_vllm.sh \
--device $DEVICE \
--vllm-ref $VLLM_REF \
......
......@@ -67,12 +67,11 @@ ENV CPATH=/usr/local/cuda/include \
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
{% if device == "xpu" %}
ENV PATH=/usr/local/bin/etcd/:$PATH
{% else %}
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
{% if device == "cuda" %}
ENV PATH=/usr/local/cuda/nvvm/bin:$PATH
{% endif %}
ENV PATH=/usr/local/bin/etcd/:$PATH
# Copy uv to system /bin
COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
......@@ -153,6 +152,31 @@ RUN wget https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/int
ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
{% endif %}
{% if device == "cpu" %}
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl ca-certificates zip unzip git lsb-release numactl wget vim \
gcc-12 g++-12 ccache \
libtcmalloc-minimal4 libnuma-dev \
ffmpeg libsm6 libxext6 libgl1 jq lsof && \
update-ca-certificates && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
curl -LsSf https://astral.sh/uv/install.sh | sh
ENV CCACHE_DIR=/root/.cache/ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/dynamo/venv"
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} && \
mkdir -p ${VIRTUAL_ENV}/include/site/python${PYTHON_VERSION} && \
chown -R dynamo:0 ${VIRTUAL_ENV} && \
chmod -R g+w ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
{% endif %}
{% if context.vllm.enable_media_ffmpeg == "true" %}
# Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
......@@ -172,6 +196,10 @@ SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
ENV NIXL_PREFIX=/opt/intel/intel_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
{% elif device == "cpu" %}
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
{% else %}
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib64
......@@ -224,7 +252,7 @@ COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
{# XPU NIXL uses lib/x86_64-linux-gnu; copy to NIXL_LIB_DIR to ensure lib dir is populated #}
COPY --chown=dynamo: --from=wheel_builder /opt/intel/intel_nixl/lib/x86_64-linux-gnu/. ${NIXL_LIB_DIR}/
{% endif %}
{# For cuda: NIXL_LIB_DIR = lib64, already included in the $NIXL_PREFIX COPY above #}
{# For cpu/cuda: NIXL libs are already included in the $NIXL_PREFIX COPY above #}
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
......@@ -236,15 +264,19 @@ $NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
${LD_LIBRARY_PATH:-}
{% if device == "cuda" %}
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$LD_LIBRARY_PATH
${LD_LIBRARY_PATH:-}
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %}
{% if device == "cpu" %}
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:${VIRTUAL_ENV}/lib/libiomp5.so"
{% endif %}
# TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if target not in ("dev", "local-dev") %}
......@@ -387,7 +419,9 @@ RUN uv pip uninstall triton triton-xpu && \
uv pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu && \
uv pip uninstall oneccl && \
uv pip uninstall oneccl-devel
{%endif%}
{% if device == "xpu" or device == "cpu" %}
SHELL ["bash", "-c"]
CMD ["bash", "-c", "source /etc/bash.bashrc && exec bash"]
{% else %}
......
......@@ -33,6 +33,17 @@ ARG CARGO_BUILD_JOBS
ARG DEVICE
WORKDIR /workspace
{% if device == "xpu" or device == "cpu" %}
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl ca-certificates zip unzip git lsb-release numactl wget vim \
libsndfile1 \
libsm6 \
libxext6 \
libgl1 \
libaio-dev \
linux-libc-dev
{% endif %}
{% if device == "cuda" %}
# Copy CUDA from base stage
......@@ -54,37 +65,22 @@ COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
{% if device == "xpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
add-apt-repository -y ppa:kobuk-team/intel-graphics
# Fetch UCX patch
RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch
RUN apt clean && apt-get update -y && \
apt-get install -y --no-install-recommends --fix-missing \
curl \
#ffmpeg \
ca-certificates \
zip \
unzip \
git \
libsndfile1 \
libsm6 \
libxext6 \
libgl1 \
lsb-release \
libaio-dev \
numactl \
wget \
vim \
linux-libc-dev && \
# Install Intel GPU runtime packages
apt update -y && apt upgrade -y && \
# Install Intel GPU runtime packages
RUN apt update -y && apt upgrade -y && \
apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd \
libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
apt-get clean && rm -rf /var/lib/apt/lists/*
{% endif %}
{% if device == "xpu" or device == "cpu" %}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# NIXL build dependencies
......@@ -189,14 +185,14 @@ RUN set -eux; \
# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc
{% if device == "xpu" %}
{% if device == "xpu" or device == "cpu" %}
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-}
{% else %}
ENV CUDA_PATH=/usr/local/cuda \
PATH=/usr/local/cuda/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-} \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %}
......@@ -253,11 +249,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
if [ "$USE_SCCACHE" = "true" ]; then \
eval $(/tmp/use-sccache.sh setup-env); \
fi && \
if [ "$DEVICE" = "xpu" ]; then \
apt-get update -y && apt-get install -y pkg-config; \
if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
apt-get clean && rm -rf /var/lib/apt/lists/*; \
elif [ "$DEVICE" = "cuda" ]; then \
dnf install -y pkg-config; \
dnf install -y pkg-config xz; \
fi && \
cd /tmp && \
curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
......@@ -331,6 +327,18 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--with-gdrcopy=/usr/local \
--with-efa \
--enable-mt; \
elif [ "$DEVICE" = "cpu" ]; then \
./contrib/configure-release \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-verbs \
--without-cuda \
--enable-mt; \
fi && \
make -j && \
make -j install-strip && \
......@@ -495,8 +503,8 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
git checkout ${NIXL_REF} && \
if [ "$DEVICE" = "cuda" ]; then \
PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
elif [ "$DEVICE" = "xpu" ]; then \
PKG_NAME="nixl-xpu"; \
else \
PKG_NAME="nixl-${DEVICE}"; \
fi && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \
......@@ -509,6 +517,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
elif [ "$DEVICE" = "xpu" ]; then \
meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
-Ducx_path="/usr/local/ucx"; \
elif [ "$DEVICE" = "cpu" ]; then \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Ducx_path="/usr/local/ucx"; \
fi && \
cd build && \
ninja && \
......@@ -520,6 +531,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/intel/intel_nixl
{% elif device == "cpu" %}
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
{% else %}
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
......@@ -579,7 +594,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \
elif [ "$DEVICE" = "xpu" ]; then \
elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
cp target/wheels/*.whl /opt/dynamo/dist/; \
fi; \
fi && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment