feat: Add support for cpu builds in dockerfiles (#7139)

Signed-off-by: Sandeep Maddipatla <sandeep.maddipatla@intel.com>

feat: Add support for cpu builds in dockerfiles (#7139)
Signed-off-by: Sandeep Maddipatla <sandeep.maddipatla@intel.com>
23c42d83 · Sandeep Maddipatla · GitHub · f29753dc · 23c42d83 · 23c42d83
Unverified Commit 23c42d83 authored Mar 13, 2026 by Sandeep Maddipatla Committed by GitHub Mar 13, 2026
8 changed files
--- a/container/context.yaml
+++ b/container/context.yaml
@@ -53,6 +53,12 @@ vllm:
    base_image_tag: 2025.3.2-0-devel-ubuntu24.04
    runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04
    vllm_ref: v0.14.0
+  cpu:
+    base_image: ubuntu
+    runtime_image: ubuntu
+    base_image_tag: 24.04
+    runtime_image_tag: 24.04
+    vllm_ref: v0.16.0
  flashinf_ref: v0.6.4
  lmcache_ref: 0.4.1
  vllm_omni_ref: "v0.16.0"

--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -128,7 +128,7 @@ if [ "$DEVICE" = "cuda" ]; then
    echo "\n=== Configuration Summary ==="
    echo "  VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND"
    echo "  TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR"
-elif [ "$DEVICE" = "xpu" ]; then
+elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then
    echo "\n=== Configuration Summary ==="
    echo "  VLLM_REF=$VLLM_REF | ARCH=$ARCH | INSTALLATION_DIR=$INSTALLATION_DIR"
 fi
@@ -191,6 +191,21 @@ if [ "$DEVICE" = "cuda" ]; then
    uv pip install flashinfer-cubin==$FLASHINF_REF
    uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND}
 fi
+
+if [ "$DEVICE" = "cpu" ]; then
+    echo "\n=== Installing vLLM for cpu ==="
+    if [ -n "${CACHE_BUSTER:-}" ]; then
+        echo "$CACHE_BUSTER" > /tmp/builder-buster
+    fi
+    # vLLM CPU requirements pin torch with a +cpu local version (e.g. 2.10.0+cpu),
+    # which is published on the PyTorch CPU wheel index instead of PyPI.
+    # Install torchvision, torchaudio from the same index to get the correct versions with +cpu suffix.
+    uv pip install -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+    uv pip install torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+    VLLM_TARGET_DEVICE=cpu \
+    python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
+    uv pip install dist/*.whl
+fi
 echo "✓ vLLM installation completed"

 echo "\n=== Installing LMCache from source ==="

--- a/container/render.py
+++ b/container/render.py
@@ -51,7 +51,7 @@ def parse_args():
        "--device",
        type=str,
        default="cuda",
-        choices=["cuda", "xpu"],
+        choices=["cuda", "xpu", "cpu"],
        help="Dockerfile device to use",
    )

@@ -99,7 +99,7 @@ def parse_args():
 def validate_args(args):
    valid_inputs = {
        "vllm": {
-            "device": ["cuda", "xpu"],
+            "device": ["cuda", "xpu", "cpu"],
            "target": [
                "runtime",
                "dev",

--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
@@ -37,7 +37,7 @@ ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }}
 {%- endif %}

 # wheel builder image selection
-{% if device == "xpu" %}
+{% if device == "xpu" or device == "cpu" %}
 ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG}
 {% elif platform == "multi" %}
 {# Multi-arch: manylinux selection is handled via --platform-pinned stage aliases   #}
@@ -130,4 +130,4 @@ ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
 {% if make_efa == true %}
 ARG EFA_VERSION={{ context.dynamo.efa_version }}
 ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
-{%- endif -%}
\ No newline at end of file
+{%- endif -%}
--- a/container/templates/dynamo_base.Dockerfile
+++ b/container/templates/dynamo_base.Dockerfile
@@ -14,6 +14,12 @@ ARG TARGETARCH
 USER root
 WORKDIR /opt/dynamo

+{% if device == "cpu" %}
+RUN apt clean && apt-get update -y && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    curl ca-certificates zip unzip git lsb-release numactl wget vim
+{% endif %}
+
 # Install sccache into the base image so downstream stages can COPY it
 # instead of downloading from GitHub (avoids 502 errors under parallel builds)
 ARG SCCACHE_VERSION=v0.14.0

--- a/container/templates/vllm_framework.Dockerfile
+++ b/container/templates/vllm_framework.Dockerfile
@@ -10,7 +10,7 @@
 # PURPOSE: Framework development and vLLM compilation
 #
 # This stage builds and compiles framework dependencies including:
-# - vLLM inference engine with CUDA/XPU support
+# - vLLM inference engine with CUDA/XPU/CPU support
 # - DeepGEMM and FlashInfer optimizations
 # - All necessary build tools and compilation dependencies
 # - Framework-level Python packages and extensions
@@ -29,6 +29,10 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
 ARG PYTHON_VERSION
 ARG DEVICE

+RUN apt clean && apt-get update -y && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    curl ca-certificates zip unzip git lsb-release numactl wget vim
+
 # Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    apt-get update -y \
@@ -88,12 +92,34 @@ ENV VLLM_TARGET_DEVICE=xpu
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 {% endif %}

+{% if device == "cpu" %}
+## Use guidelines from https://docs.vllm.ai/en/stable/getting_started/installation/cpu/#build-image-from-source
+## to build a cross compiled target to support AVX512, AMX ISA's
+## vllm-0.16 has a bug that handles non-AVX512 supported cases incorrectly
+## -  https://github.com/vllm-project/vllm/issues/33991
+## -  Build settings chosen to cross-compile with AVX512 support on amd64 only.
+
+ENV VLLM_TARGET_DEVICE=cpu
+ARG VLLM_CPU_DISABLE_AVX512=false  # If false, decide based on build-machine support or below flags (latter overrides former). If true, disable AVX512 support.
+ARG VLLM_CPU_AVX512=true           # Support for building with AVX512 ISA (Explicitly enable to cross-compile)
+ARG VLLM_CPU_AVX512BF16=true       # Support for building with AVX512BF16 ISA
+ARG VLLM_CPU_AVX512VNNI=false      # Support for building with VLLM_CPU_AVX512VNNI ISA
+ARG VLLM_CPU_AMXBF16=true          # Support for building with AMXBF16 ISA
+{% endif %}
+
 # Install VLLM and related dependencies
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    --mount=type=cache,target=/root/.cache/uv \
    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
    chmod +x /tmp/install_vllm.sh && \
+    if [ "$DEVICE" = "cpu" ] && [ "$TARGETARCH" = "amd64" ]; then \
+        export VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} \
+               VLLM_CPU_AVX512=${VLLM_CPU_AVX512} \
+               VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} \
+               VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI} \
+               VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}; \
+    fi && \
    /tmp/install_vllm.sh \
        --device $DEVICE \
        --vllm-ref $VLLM_REF \

--- a/container/templates/vllm_runtime.Dockerfile
+++ b/container/templates/vllm_runtime.Dockerfile
@@ -67,12 +67,11 @@ ENV CPATH=/usr/local/cuda/include \
 COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
 COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/

-{% if device == "xpu" %}
-ENV PATH=/usr/local/bin/etcd/:$PATH
-{% else %}
 # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
-ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
+{% if device == "cuda" %}
+ENV PATH=/usr/local/cuda/nvvm/bin:$PATH
 {% endif %}
+ENV PATH=/usr/local/bin/etcd/:$PATH

 # Copy uv to system /bin
 COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
@@ -153,6 +152,31 @@ RUN wget https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/int
    ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
 {% endif %}

+{% if device == "cpu" %}
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    curl ca-certificates zip unzip git lsb-release numactl wget vim \
+    gcc-12 g++-12 ccache \
+    libtcmalloc-minimal4 libnuma-dev \
+    ffmpeg libsm6 libxext6 libgl1 jq lsof && \
+    update-ca-certificates  && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+
+ENV CCACHE_DIR=/root/.cache/ccache
+ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
+
+ENV PATH="/root/.local/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/dynamo/venv"
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} && \
+    mkdir -p ${VIRTUAL_ENV}/include/site/python${PYTHON_VERSION} && \
+    chown -R dynamo:0 ${VIRTUAL_ENV} && \
+    chmod -R g+w ${VIRTUAL_ENV}
+
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+{% endif %}
+
 {% if context.vllm.enable_media_ffmpeg == "true" %}
 # Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
@@ -172,6 +196,10 @@ SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
 ENV NIXL_PREFIX=/opt/intel/intel_nixl
 ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
 ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+{% elif device == "cpu" %}
+ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
+ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/x86_64-linux-gnu
+ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
 {% else %}
 ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
 ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib64
@@ -224,7 +252,7 @@ COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
 {# XPU NIXL uses lib/x86_64-linux-gnu; copy to NIXL_LIB_DIR to ensure lib dir is populated #}
 COPY --chown=dynamo: --from=wheel_builder /opt/intel/intel_nixl/lib/x86_64-linux-gnu/. ${NIXL_LIB_DIR}/
 {% endif %}
-{# For cuda: NIXL_LIB_DIR = lib64, already included in the $NIXL_PREFIX COPY above #}
+{# For cpu/cuda: NIXL libs are already included in the $NIXL_PREFIX COPY above #}
 COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
 COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

@@ -236,15 +264,19 @@ $NIXL_LIB_DIR:\
 $NIXL_PLUGIN_DIR:\
 /usr/local/ucx/lib:\
 /usr/local/ucx/lib/ucx:\
-$LD_LIBRARY_PATH
+${LD_LIBRARY_PATH:-}

 {% if device == "cuda" %}
 ENV LD_LIBRARY_PATH=\
 /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
-$LD_LIBRARY_PATH
+${LD_LIBRARY_PATH:-}
 ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
 {% endif %}

+{% if device == "cpu" %}
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:${VIRTUAL_ENV}/lib/libiomp5.so"
+{% endif %}
+
 # TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
 COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
 {% if target not in ("dev", "local-dev") %}
@@ -387,7 +419,9 @@ RUN uv pip uninstall triton triton-xpu && \
    uv pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu && \
    uv pip uninstall oneccl && \
    uv pip uninstall oneccl-devel
+{%endif%}

+{% if device == "xpu" or device == "cpu" %}
 SHELL ["bash", "-c"]
 CMD ["bash", "-c", "source /etc/bash.bashrc && exec bash"]
 {% else %}

--- a/container/templates/wheel_builder.Dockerfile
+++ b/container/templates/wheel_builder.Dockerfile
@@ -33,6 +33,17 @@ ARG CARGO_BUILD_JOBS
 ARG DEVICE

 WORKDIR /workspace
+{% if device == "xpu" or device == "cpu" %}
+RUN apt clean && apt-get update -y && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    curl ca-certificates zip unzip git lsb-release numactl wget vim \
+    libsndfile1 \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    libaio-dev \
+    linux-libc-dev
+{% endif %}

 {% if device == "cuda" %}
 # Copy CUDA from base stage
@@ -54,37 +65,22 @@ COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
 COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME

 {% if device == "xpu" %}
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
    add-apt-repository -y ppa:kobuk-team/intel-graphics

+# Fetch UCX patch
 RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch

-RUN apt clean && apt-get update -y && \
-    apt-get install -y --no-install-recommends --fix-missing \
-    curl \
-    #ffmpeg \
-    ca-certificates \
-    zip \
-    unzip \
-    git \
-    libsndfile1 \
-    libsm6 \
-    libxext6 \
-    libgl1 \
-    lsb-release \
-    libaio-dev \
-    numactl \
-    wget \
-    vim \
-    linux-libc-dev && \
-    # Install Intel GPU runtime packages
-    apt update -y && apt upgrade -y && \
+# Install Intel GPU runtime packages
+RUN apt update -y && apt upgrade -y && \
    apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd  \
    libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
    apt-get clean && rm -rf /var/lib/apt/lists/*
+{% endif %}

+{% if device == "xpu" or device == "cpu" %}
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # NIXL build dependencies
@@ -189,14 +185,14 @@ RUN set -eux; \
 # Point build tools explicitly at the modern protoc
 ENV PROTOC=/usr/local/bin/protoc

-{% if device == "xpu" %}
+{% if device == "xpu" or device == "cpu" %}
 # Install uv package manager
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-}
 {% else %}
 ENV CUDA_PATH=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH:-} \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
 {% endif %}

@@ -253,11 +249,11 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env); \
    fi && \
-    if [ "$DEVICE" = "xpu" ]; then \
-    apt-get update -y && apt-get install -y pkg-config; \
+    if [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
+    apt-get update -y && apt-get install -y build-essential pkg-config xz-utils; \
    apt-get clean && rm -rf /var/lib/apt/lists/*; \
    elif [ "$DEVICE" = "cuda" ]; then \
-    dnf install -y pkg-config; \
+    dnf install -y pkg-config xz; \
    fi && \
    cd /tmp && \
    curl --retry 5 --retry-delay 3 -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
@@ -331,6 +327,18 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
        --with-gdrcopy=/usr/local   \
        --with-efa                  \
        --enable-mt;                 \
+    elif [ "$DEVICE" = "cpu" ]; then  \
+     ./contrib/configure-release     \
+        --prefix=/usr/local/ucx     \
+        --enable-shared             \
+        --disable-static            \
+        --disable-doxygen-doc       \
+        --enable-optimizations      \
+        --enable-cma                \
+        --enable-devel-headers      \
+        --with-verbs                \
+        --without-cuda              \
+        --enable-mt;                 \
     fi && \
     make -j &&                      \
     make -j install-strip &&        \
@@ -495,8 +503,8 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    git checkout ${NIXL_REF} && \
    if [ "$DEVICE" = "cuda" ]; then \
        PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
-    elif [ "$DEVICE" = "xpu" ]; then \
-        PKG_NAME="nixl-xpu"; \
+    else \
+        PKG_NAME="nixl-${DEVICE}"; \
    fi && \
    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
    mkdir build && \
@@ -509,6 +517,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    elif [ "$DEVICE" = "xpu" ]; then \
        meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
            -Ducx_path="/usr/local/ucx"; \
+    elif [ "$DEVICE" = "cpu" ]; then \
+        meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
+            -Ducx_path="/usr/local/ucx"; \
    fi && \
    cd build && \
    ninja && \
@@ -520,6 +531,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
 ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
    NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
    NIXL_PREFIX=/opt/intel/intel_nixl
+{% elif device == "cpu" %}
+ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu \
+    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/plugins \
+    NIXL_PREFIX=/opt/nvidia/nvda_nixl
 {% else %}
 ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
@@ -579,7 +594,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
                --plat manylinux_2_28_${ARCH_ALT} \
                --wheel-dir /opt/dynamo/dist \
                target/wheels/*.whl; \
-        elif [ "$DEVICE" = "xpu" ]; then \
+        elif [ "$DEVICE" = "xpu" ] || [ "$DEVICE" = "cpu" ]; then \
            cp target/wheels/*.whl /opt/dynamo/dist/; \
        fi; \
    fi && \