feat: enable intel xpu dockerfile (#6109)

Signed-off-by: Zhan Xue <zhan.xue@intel.com> Co-authored-by: XinYu Ye <xinyu.ye@intel.com> Co-authored-by: Hongming Zheng <hongming.zheng@intel.com>

feat: enable intel xpu dockerfile (#6109)
Signed-off-by: Zhan Xue <zhan.xue@intel.com> Co-authored-by: XinYu Ye <xinyu.ye@intel.com> Co-authored-by: Hongming Zheng <hongming.zheng@intel.com>
e0a2e7bb · zxue2 · GitHub · ec63ff72 · e0a2e7bb · e0a2e7bb
Unverified Commit e0a2e7bb authored Mar 04, 2026 by zxue2 Committed by GitHub Mar 04, 2026
9 changed files
--- a/container/context.yaml
+++ b/container/context.yaml
@@ -10,10 +10,11 @@
 # when building.
 dynamo:
-  base_image: nvcr.io/nvidia/cuda-dl-base
  cuda12.9:
+    base_image: nvcr.io/nvidia/cuda-dl-base
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
  cuda13.0:
+    base_image: nvcr.io/nvidia/cuda-dl-base
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
@@ -34,15 +35,24 @@ dynamo:
  efa_version: 1.45.1
 vllm:
-  base_image: nvcr.io/nvidia/cuda-dl-base
-  runtime_image: nvcr.io/nvidia/cuda
  cuda12.9:
+    base_image: nvcr.io/nvidia/cuda-dl-base
+    runtime_image: nvcr.io/nvidia/cuda
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
    runtime_image_tag: 12.9.1-runtime-ubuntu24.04
+    vllm_ref: v0.16.0
  cuda13.0:
+    base_image: nvcr.io/nvidia/cuda-dl-base
+    runtime_image: nvcr.io/nvidia/cuda
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
    runtime_image_tag: 13.0.2-runtime-ubuntu24.04
-  vllm_ref: v0.16.0
+    vllm_ref: v0.16.0
+  xpu:
+    base_image: intel/deep-learning-essentials
+    runtime_image: intel/deep-learning-essentials
+    base_image_tag: 2025.3.2-0-devel-ubuntu24.04
+    runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04
+    vllm_ref: v0.14.0
  flashinf_ref: v0.6.3
  lmcache_ref: 0.3.14
  vllm_omni_ref: "v0.16.0rc1"
@@ -54,12 +64,14 @@ vllm:
  modelexpress_ref: "3d73992ce6c10e52ddc54f7f12af35d27e173f15"
 sglang:
-  base_image: nvcr.io/nvidia/cuda-dl-base
-  runtime_image: lmsysorg/sglang
  cuda12.9:
+    base_image: nvcr.io/nvidia/cuda-dl-base
+    runtime_image: lmsysorg/sglang
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
    runtime_image_tag: v0.5.9-runtime
  cuda13.0:
+    base_image: nvcr.io/nvidia/cuda-dl-base
+    runtime_image: lmsysorg/sglang
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
    runtime_image_tag: v0.5.9-cu130-runtime
  enable_media_ffmpeg: "false"
@@ -67,9 +79,9 @@ sglang:
  enable_kvbm: "false"
 trtllm:
-  base_image: nvcr.io/nvidia/pytorch
-  runtime_image: nvcr.io/nvidia/cuda-dl-base
  cuda13.1:
+    base_image: nvcr.io/nvidia/pytorch
+    runtime_image: nvcr.io/nvidia/cuda-dl-base
    base_image_tag: 25.12-py3
    runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
  enable_media_ffmpeg: "false"

--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -14,6 +14,7 @@ set -euo pipefail
 VLLM_VER="0.16.0"
 VLLM_REF="v${VLLM_VER}"
+DEVICE="cuda"
 # Basic Configurations
 ARCH=$(uname -m)
@@ -30,6 +31,10 @@ VLLM_OMNI_REF="v0.16.0rc1"
 while [[ $# -gt 0 ]]; do
    case $1 in
+        --device)
+            DEVICE="$2"
+            shift 2
+            ;;
        --vllm-ref)
            VLLM_REF="$2"
            shift 2
@@ -71,8 +76,9 @@ while [[ $# -gt 0 ]]; do
            shift 2
            ;;
        -h|--help)
-            echo "Usage: $0 [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--lmcache-ref REF] [--vllm-omni-ref REF] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
+            echo "Usage: $0 [--device DEVICE] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--lmcache-ref REF] [--vllm-omni-ref REF] [--torch-cuda-arch-list LIST] [--cuda-version VERSION]"
            echo "Options:"
+            echo "  --device DEVICE     Device Selection (default: cuda)"
            echo "  --vllm-ref REF      vLLM release version (default: ${VLLM_REF})"
            echo "  --max-jobs NUM      Maximum parallel jobs (default: ${MAX_JOBS})"
            echo "  --arch ARCH         Architecture amd64|arm64 (default: auto-detect)"
@@ -107,35 +113,50 @@ elif [ "$ARCH" = "arm64" ]; then
 fi
 export MAX_JOBS=$MAX_JOBS
-export CUDA_HOME=/usr/local/cuda
+if [ "$DEVICE" = "cuda" ]; then
+    export CUDA_HOME=/usr/local/cuda
-# Derive torch backend from CUDA version (e.g., "12.9" -> "cu129")
+    # Derive torch backend from CUDA version (e.g., "12.9" -> "cu129")
-TORCH_BACKEND="cu$(echo $CUDA_VERSION | tr -d '.')"
+    TORCH_BACKEND="cu$(echo $CUDA_VERSION | tr -d '.')"
-CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
+    CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
-echo "=== Installing prerequisites ==="
+    echo "=== Installing prerequisites ==="
-uv pip install pip cuda-python
+    uv pip install pip cuda-python
+fi
-echo "\n=== Configuration Summary ==="
+if [ "$DEVICE" = "cuda" ]; then
-echo "  VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND"
+    echo "\n=== Configuration Summary ==="
-echo "  TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR"
+    echo "  VLLM_REF=$VLLM_REF | ARCH=$ARCH | CUDA_VERSION=$CUDA_VERSION | TORCH_BACKEND=$TORCH_BACKEND"
+    echo "  TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST | INSTALLATION_DIR=$INSTALLATION_DIR"
+elif [ "$DEVICE" = "xpu" ]; then
+    echo "\n=== Configuration Summary ==="
+    echo "  VLLM_REF=$VLLM_REF | ARCH=$ARCH | INSTALLATION_DIR=$INSTALLATION_DIR"
+fi
-if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
+if [ "$DEVICE" = "cuda" ]; then
-    echo "  FLASHINF_REF=$FLASHINF_REF | LMCACHE_REF=$LMCACHE_REF | DEEPGEMM_REF=$DEEPGEMM_REF"
+    if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
+        echo "  FLASHINF_REF=$FLASHINF_REF | LMCACHE_REF=$LMCACHE_REF | DEEPGEMM_REF=$DEEPGEMM_REF"
+        echo "\n=== Installing LMCache ==="
+        if [ "$ARCH" = "amd64" ]; then
+            # LMCache installation currently fails on arm64 due to CUDA dependency issues
+            # Install LMCache BEFORE vLLM so vLLM's dependencies take precedence
+            uv pip install lmcache==${LMCACHE_REF} --torch-backend=${TORCH_BACKEND}
+            echo "✓ LMCache ${LMCACHE_REF} installed"
+        else
+            echo "⚠ Skipping LMCache on ARM64 (compatibility issues)"
+        fi
+    else
+        echo "  FLASHINF_REF=$FLASHINF_REF | LMCache will not be installed as it doesn't support CUDA 13 yet | DEEPGEMM_REF=$DEEPGEMM_REF"
+    fi
+elif [ "$DEVICE" = "xpu" ]; then
+    echo " LMCACHE_REF=$LMCACHE_REF "
    echo "\n=== Installing LMCache ==="
    if [ "$ARCH" = "amd64" ]; then
-        # LMCache installation currently fails on arm64 due to CUDA dependency issues
+        uv pip install lmcache==${LMCACHE_REF}
-        # Install LMCache BEFORE vLLM so vLLM's dependencies take precedence
-        uv pip install lmcache==${LMCACHE_REF} --torch-backend=${TORCH_BACKEND}
        echo "✓ LMCache ${LMCACHE_REF} installed"
-    else
-        echo "⚠ Skipping LMCache on ARM64 (compatibility issues)"
    fi
-else
-    echo "  FLASHINF_REF=$FLASHINF_REF | LMCache will not be installed as it doesn't support CUDA 13 yet | DEEPGEMM_REF=$DEEPGEMM_REF"
 fi
 echo "\n=== Cloning vLLM repository ==="
 # Clone needed for DeepGEMM and EP kernels install scripts
 cd $INSTALLATION_DIR
@@ -144,48 +165,56 @@ cd vllm
 git checkout $VLLM_REF
 echo "✓ vLLM repository cloned"
+if [ "$DEVICE" = "xpu" ]; then
+    echo "\n=== Installing vLLM ==="
+    git apply --ignore-whitespace /tmp/vllm-xpu.patch
+    uv pip install -r requirements/xpu.txt --index-strategy unsafe-best-match
+    uv pip install --verbose --no-build-isolation .
+fi
-echo "\n=== Installing vLLM & FlashInfer ==="
+if [ "$DEVICE" = "cuda" ]; then
+    echo "\n=== Installing vLLM & FlashInfer ==="
-# Build GitHub release wheel URL per CUDA version
+    # Build GitHub release wheel URL per CUDA version
-# CUDA 12 wheels have no +cu suffix and use manylinux_2_31
+    # CUDA 12 wheels have no +cu suffix and use manylinux_2_31
-# CUDA 13 wheels have +cu130 suffix and use manylinux_2_35
+    # CUDA 13 wheels have +cu130 suffix and use manylinux_2_35
-if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
+    if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
-    VLLM_GITHUB_WHEEL="vllm-${VLLM_VER}-cp38-abi3-manylinux_2_31_${ALT_ARCH}.whl"
+        VLLM_GITHUB_WHEEL="vllm-${VLLM_VER}-cp38-abi3-manylinux_2_31_${ALT_ARCH}.whl"
-    EXTRA_PIP_ARGS=""
+        EXTRA_PIP_ARGS=""
-elif [[ "$CUDA_VERSION_MAJOR" == "13" ]]; then
+    elif [[ "$CUDA_VERSION_MAJOR" == "13" ]]; then
-    VLLM_GITHUB_WHEEL="vllm-${VLLM_VER}+${TORCH_BACKEND}-cp38-abi3-manylinux_2_35_${ALT_ARCH}.whl"
+        VLLM_GITHUB_WHEEL="vllm-${VLLM_VER}+${TORCH_BACKEND}-cp38-abi3-manylinux_2_35_${ALT_ARCH}.whl"
-    EXTRA_PIP_ARGS="--index-strategy=unsafe-best-match --extra-index-url https://download.pytorch.org/whl/${TORCH_BACKEND}"
+        EXTRA_PIP_ARGS="--index-strategy=unsafe-best-match --extra-index-url https://download.pytorch.org/whl/${TORCH_BACKEND}"
-else
+    else
-    echo "❌ Unsupported CUDA version for vLLM installation: ${CUDA_VERSION}"
+        echo "❌ Unsupported CUDA version for vLLM installation: ${CUDA_VERSION}"
-    exit 1
+        exit 1
-fi
+    fi
-VLLM_GITHUB_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VER}/${VLLM_GITHUB_WHEEL}"
+    VLLM_GITHUB_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VER}/${VLLM_GITHUB_WHEEL}"
-# Install vLLM wheel
+    # Install vLLM wheel
-# CUDA 12: Try PyPI first, fall back to GitHub release
+    # CUDA 12: Try PyPI first, fall back to GitHub release
-# CUDA 13: Always use GitHub release (PyPI only has cu12 wheels, --torch-backend
+    # CUDA 13: Always use GitHub release (PyPI only has cu12 wheels, --torch-backend
-#           does not prevent uv from resolving the cu12 variant)
+    #           does not prevent uv from resolving the cu12 variant)
-echo "Installing vLLM $VLLM_VER (torch backend: $TORCH_BACKEND)..."
+    echo "Installing vLLM $VLLM_VER (torch backend: $TORCH_BACKEND)..."
-if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
+    if [[ "$CUDA_VERSION_MAJOR" == "12" ]]; then
-    if uv pip install "vllm[flashinfer,runai]==${VLLM_VER}" ${EXTRA_PIP_ARGS} --torch-backend=${TORCH_BACKEND} 2>&1; then
+        if uv pip install "vllm[flashinfer,runai]==${VLLM_VER}" ${EXTRA_PIP_ARGS} --torch-backend=${TORCH_BACKEND} 2>&1; then
-        echo "✓ vLLM ${VLLM_VER} installed from PyPI"
+            echo "✓ vLLM ${VLLM_VER} installed from PyPI"
+        else
+            echo "⚠ PyPI install failed, installing from GitHub release..."
+            uv pip install ${EXTRA_PIP_ARGS} \
+                "${VLLM_GITHUB_URL}[flashinfer,runai]" \
+                --torch-backend=${TORCH_BACKEND}
+            echo "✓ vLLM ${VLLM_VER} installed from GitHub"
+        fi
    else
-        echo "⚠ PyPI install failed, installing from GitHub release..."
+        echo "Installing vLLM from GitHub release (cu130 wheel not available on PyPI)..."
        uv pip install ${EXTRA_PIP_ARGS} \
            "${VLLM_GITHUB_URL}[flashinfer,runai]" \
            --torch-backend=${TORCH_BACKEND}
        echo "✓ vLLM ${VLLM_VER} installed from GitHub"
    fi
-else
+    uv pip install flashinfer-cubin==$FLASHINF_REF
-    echo "Installing vLLM from GitHub release (cu130 wheel not available on PyPI)..."
+    uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND}
-    uv pip install ${EXTRA_PIP_ARGS} \
-        "${VLLM_GITHUB_URL}[flashinfer,runai]" \
-        --torch-backend=${TORCH_BACKEND}
-    echo "✓ vLLM ${VLLM_VER} installed from GitHub"
 fi
-uv pip install flashinfer-cubin==$FLASHINF_REF
-uv pip install flashinfer-jit-cache==$FLASHINF_REF --extra-index-url https://flashinfer.ai/whl/${TORCH_BACKEND}
 echo "✓ vLLM installation completed"
 echo "\n=== Installing vLLM-Omni ==="
@@ -210,18 +239,19 @@ else
    echo "⚠ Skipping vLLM-Omni (no ref provided or ARM64 not supported)"
 fi
-echo "\n=== Installing DeepGEMM ==="
+if [ "$DEVICE" = "cuda" ]; then
-cd $INSTALLATION_DIR/vllm/tools
+    echo "\n=== Installing DeepGEMM ==="
-if [ -n "$DEEPGEMM_REF" ]; then
+    cd $INSTALLATION_DIR/vllm/tools
-    bash install_deepgemm.sh --cuda-version "${CUDA_VERSION}" --ref "$DEEPGEMM_REF"
+    if [ -n "$DEEPGEMM_REF" ]; then
-else
+        bash install_deepgemm.sh --cuda-version "${CUDA_VERSION}" --ref "$DEEPGEMM_REF"
-    bash install_deepgemm.sh --cuda-version "${CUDA_VERSION}"
+    else
-fi
+        bash install_deepgemm.sh --cuda-version "${CUDA_VERSION}"
-echo "✓ DeepGEMM installation completed"
+    fi
+    echo "✓ DeepGEMM installation completed"
-echo "\n=== Installing EP Kernels (PPLX and DeepEP) ==="
-cd ep_kernels/
-# TODO we will be able to specify which pplx and deepep commit we want in future
-TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh
+    echo "\n=== Installing EP Kernels (PPLX and DeepEP) ==="
+    cd ep_kernels/
+    # TODO we will be able to specify which pplx and deepep commit we want in future
+    TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh
+fi
 echo "\n✅ All installations completed successfully!"
--- a/container/render.py
+++ b/container/render.py
@@ -21,6 +21,15 @@ def parse_args():
        choices=["dynamo", "vllm", "sglang", "trtllm"],
        help="Dockerfile framework to use",
    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cuda",
+        choices=["cuda", "xpu"],
+        help="Dockerfile device to use",
+    )
    parser.add_argument(
        "--target",
        type=str,
@@ -58,6 +67,7 @@ def parse_args():
 def validate_args(args):
    valid_inputs = {
        "vllm": {
+            "device": ["cuda", "xpu"],
            "target": [
                "runtime",
                "dev",
@@ -69,6 +79,7 @@ def validate_args(args):
            "cuda_version": ["12.9", "13.0"],
        },
        "trtllm": {
+            "device": ["cuda"],
            "target": [
                "runtime",
                "dev",
@@ -80,6 +91,7 @@ def validate_args(args):
            "cuda_version": ["13.1"],
        },
        "sglang": {
+            "device": ["cuda"],
            "target": [
                "runtime",
                "dev",
@@ -90,6 +102,7 @@ def validate_args(args):
            "cuda_version": ["12.9", "13.0"],
        },
        "dynamo": {
+            "device": ["cuda"],
            "target": [
                "runtime",
                "dev",
@@ -106,14 +119,16 @@ def validate_args(args):
        if (
            args.target in valid_inputs[args.framework]["target"]
            and args.cuda_version in valid_inputs[args.framework]["cuda_version"]
+            and args.device in valid_inputs[args.framework]["device"]
        ):
            return
        raise ValueError(
-            f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
+            f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version},device={args.device}]"
        )
    raise ValueError(
-        f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
+        f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version},device={args.device}]"
    )
@@ -128,6 +143,7 @@ def render(args, context, script_dir):
    rendered = template.render(
        context=context,
        framework=args.framework,
+        device=args.device,
        target=args.target,
        platform=args.platform,
        cuda_version=args.cuda_version,
@@ -139,7 +155,7 @@ def render(args, context, script_dir):
    if args.output_short_filename:
        filename = "rendered.Dockerfile"
    else:
-        filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
+        filename = f"{args.framework}-{args.target}-{args.device}{args.cuda_version}-{args.platform}-rendered.Dockerfile"
    with open(f"{script_dir}/{filename}", "w") as f:
        f.write(cleaned)
@@ -159,6 +175,9 @@ def render(args, context, script_dir):
 def main():
    args = parse_args()
    validate_args(args)
+    # Clear cuda version for non-cuda device
+    if args.device != "cuda":
+        args.cuda_version = ""
    script_dir = Path(__file__).parent
    with open(f"{script_dir}/context.yaml", "r") as f:
        context = yaml.safe_load(f)

--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
@@ -18,21 +18,35 @@
 #TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
 ARG ARCH={{ platform }}
 ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
+ARG DEVICE={{ device }}
+{% if device == "cuda" -%}
+{% set device_key = device + cuda_version -%}
+{% else -%}
+{% set device_key = device -%}
+{% endif %}
 # Python/CUDA configuration
 ARG PYTHON_VERSION={{ context.dynamo.python_version }}
+{% if device == "cuda" -%}
 ARG CUDA_VERSION={{ cuda_version }}
 ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
+{% endif %}
 # Base and runtime images configuration
-{% set cuda_context_key = "cuda" + cuda_version %}
+ARG BASE_IMAGE={{ context[framework][device_key].base_image }}
-ARG BASE_IMAGE={{ context[framework].base_image }}
+ARG BASE_IMAGE_TAG={{ context[framework][device_key].base_image_tag }}
-ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
 {% if framework in ["sglang", "trtllm", "vllm"] -%}
-ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
+ARG RUNTIME_IMAGE={{ context[framework][device_key].runtime_image }}
-ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
+ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }}
 {%- endif %}
+# wheel builder image selection
+{% if device == "xpu" %}
+ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG}
+{% else %}
+ARG WHEEL_BUILDER_IMAGE=quay.io/pypa/manylinux_2_28_${ARCH_ALT}
+{% endif %}
 # Build configuration
 ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
 ARG CARGO_BUILD_JOBS
@@ -42,7 +56,9 @@ ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
 ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
 ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
+{% if device == "cuda" -%}
 ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
+{% endif %}
 # SCCACHE configuration
 ARG USE_SCCACHE
@@ -52,8 +68,10 @@ ARG SCCACHE_REGION=""
 # NIXL configuration
 ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
 ARG NIXL_REF={{ context.dynamo.nixl_ref }}
+{% if device == "cuda" %}
 ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
 ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
+{% endif %}
 {% if target == "dev" or target == "local-dev" %}
 ARG FRAMEWORK={{ framework }}
@@ -66,19 +84,23 @@ ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
 {% if framework == "vllm" -%}
 # Make sure to update the dependency version in pyproject.toml when updating this
-ARG VLLM_REF={{ context.vllm.vllm_ref }}
+ARG VLLM_REF={{ context[framework][device_key].vllm_ref }}
 ARG MAX_JOBS={{ context.vllm.max_jobs }}
 # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
+{% if device == "cuda" -%}
 ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
+{% endif %}
 ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
 ARG VLLM_OMNI_REF={{ context.vllm.vllm_omni_ref }}
+{% if device == "cuda" -%}
 # If left blank, then we will fallback to vLLM defaults
 ARG DEEPGEMM_REF=""
 # ModelExpress for P2P weight transfer (optional)
 ARG ENABLE_MODELEXPRESS_P2P={{ context.vllm.enable_modelexpress_p2p }}
 ARG MODELEXPRESS_REF={{ context.vllm.modelexpress_ref }}
+{% endif %}
 {%- endif -%}
 {% if framework == "trtllm" %}

--- a/container/templates/dev.Dockerfile
+++ b/container/templates/dev.Dockerfile
@@ -15,6 +15,7 @@ FROM runtime AS dynamo_tools
 ARG ARCH
 ARG ARCH_ALT
+ARG DEVICE
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PATH=/usr/local/bin:${PATH}

--- a/container/templates/local_dev.Dockerfile
+++ b/container/templates/local_dev.Dockerfile
@@ -15,6 +15,7 @@ FROM aws AS local-dev
 ENV USERNAME=dynamo
 ARG USER_UID
 ARG USER_GID
+ARG DEVICE
 # rustup is already at /home/dynamo/.rustup from the dev stage (COPY --from=wheel_builder
 # with --chown=dynamo:0 --chmod=775), so no re-copy needed here.

--- a/container/templates/vllm_framework.Dockerfile
+++ b/container/templates/vllm_framework.Dockerfile
@@ -10,7 +10,7 @@
 # PURPOSE: Framework development and vLLM compilation
 #
 # This stage builds and compiles framework dependencies including:
-# - vLLM inference engine with CUDA support
+# - vLLM inference engine with CUDA/XPU support
 # - DeepGEMM and FlashInfer optimizations
 # - All necessary build tools and compilation dependencies
 # - Framework-level Python packages and extensions
@@ -27,6 +27,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
 COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
 ARG PYTHON_VERSION
+ARG DEVICE
 # Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
@@ -65,15 +66,27 @@ ARG ARCH
 # rebuilds from unrelated source code changes
 ARG VLLM_REF
 ARG VLLM_GIT_URL
-ARG DEEPGEMM_REF
-ARG FLASHINF_REF
 ARG LMCACHE_REF
 ARG VLLM_OMNI_REF
+{% if device == "cuda" %}
+ARG DEEPGEMM_REF
+ARG FLASHINF_REF
 ARG CUDA_VERSION
+{% endif %}
 ARG MAX_JOBS
 ENV MAX_JOBS=$MAX_JOBS
+{% if device == "cuda" %}
 ENV CUDA_HOME=/usr/local/cuda
+{% endif %}
+{% if device == "xpu" %}
+RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/vllm-xpu-v0.14.0.patch -O /tmp/vllm-xpu.patch
+ENV VLLM_TARGET_DEVICE=xpu
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+{% endif %}
 # Install VLLM and related dependencies
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
@@ -82,16 +95,19 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
    chmod +x /tmp/install_vllm.sh && \
    /tmp/install_vllm.sh \
+        --device $DEVICE \
        --vllm-ref $VLLM_REF \
        --max-jobs $MAX_JOBS \
        --arch $ARCH \
        --installation-dir /opt \
-        ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} \
-        ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} \
        ${LMCACHE_REF:+--lmcache-ref "$LMCACHE_REF"} \
        ${VLLM_OMNI_REF:+--vllm-omni-ref "$VLLM_OMNI_REF"} \
-        --cuda-version $CUDA_VERSION
+        ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} \
+        ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} \
+        ${CUDA_VERSION:+--cuda-version "$CUDA_VERSION"}
+{% if device == "cuda" %}
 ENV LD_LIBRARY_PATH=\
 /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
 $LD_LIBRARY_PATH
+{% endif %}
--- a/container/templates/vllm_runtime.Dockerfile
+++ b/container/templates/vllm_runtime.Dockerfile
@@ -24,10 +24,19 @@
 FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
+ARG DEVICE
 WORKDIR /workspace
 ENV DYNAMO_HOME=/opt/dynamo
 ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+{% if device == "xpu" %}
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    add-apt-repository -y ppa:kobuk-team/intel-graphics
+{% endif %}
+{% if device == "cuda" %}
 # Set CUDA_DEVICE_ORDER to ensure CUDA logical device IDs match NVML physical device IDs
 # This fixes NVML InvalidArgument errors when CUDA_VISIBLE_DEVICES is set
 ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
@@ -51,13 +60,19 @@ ENV CPATH=/usr/local/cuda/include \
    TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \
    TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
    TRITON_CUDART_PATH=/usr/local/cuda/include
+{% endif %}
 ### COPY NATS & ETCD ###
 # Copy nats and etcd from dev image
 COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
 COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
+{% if device == "xpu" %}
+ENV PATH=/usr/local/bin/etcd/:$PATH
+{% else %}
 # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
 ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
+{% endif %}
 # Copy uv to system /bin
 COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
@@ -82,8 +97,10 @@ ENV PYTHON_VERSION=${PYTHON_VERSION}
 # Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    apt-get update && \
-    CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*} &&\
+    if [ "$DEVICE" = "cuda" ]; then \
-    CUDA_VERSION_MINOR=$(echo "${CUDA_VERSION#*.}" | cut -d. -f1) && \
+        CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*} &&\
+        CUDA_VERSION_MINOR=$(echo "${CUDA_VERSION#*.}" | cut -d. -f1); \
+    fi && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # Python runtime - CRITICAL for virtual environment to work
        python${PYTHON_VERSION}-dev \
@@ -104,11 +121,39 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
        # prometheus dependencies
        ca-certificates \
        # opencv-python-headless (vLLM dependency) requires libxcb for some functions
-        libxcb1 \
+        libxcb1 && \
+    if [ "$DEVICE" = "cuda" ]; then \
+        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # DeepGemm uses 'cuobjdump' which does not come with CUDA image
-        cuda-command-line-tools-${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR} && \
+        cuda-command-line-tools-${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}; \
+    fi && \
    rm -rf /var/lib/apt/lists/*
+{% if device == "xpu" %}
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    #ffmpeg \
+    libsndfile1 \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    lsb-release \
+    numactl \
+    wget \
+    vim \
+    linux-libc-dev && \
+    # Install Intel GPU runtime packages
+    apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd libze-intel-gpu-raytracing \
+    intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN wget https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/intel-oneccl-2021.15.7.8_offline.sh && \
+    bash intel-oneccl-2021.15.7.8_offline.sh -a --silent --eula accept && \
+    echo "source /opt/intel/oneapi/setvars.sh --force" >> /etc/bash.bashrc && \
+    rm -f /opt/intel/oneapi/ccl/latest && \
+    ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
+{% endif %}
 {% if context.vllm.enable_media_ffmpeg == "true" %}
 # Copy ffmpeg libraries from wheel_builder (requires root, runs before USER dynamo)
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
@@ -124,9 +169,15 @@ ENV HOME=/home/dynamo
 # This picks up the umask 002 from the /etc/profile.d/00-umask.sh file for subsequent RUN commands
 SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
+{% if device == "xpu" %}
+ENV NIXL_PREFIX=/opt/intel/intel_nixl
+ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
+ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+{% else %}
 ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
 ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
 ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+{% endif %}
 # Site-packages path derived from PYTHON_VERSION ARG
 ARG SITE_PACKAGES=${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages
@@ -138,15 +189,19 @@ ARG SITE_PACKAGES=${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages
 #
 # Layer sizes (uncompressed): nvidia=4.5GB, flashinfer_jit_cache=4.1GB, torch=2.1GB,
 #                             vllm=1.2GB, triton=592MB, flashinfer_cubin=437MB
+{% if device == "cuda" %}
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/nvidia ${SITE_PACKAGES}/nvidia
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_jit_cache ${SITE_PACKAGES}/flashinfer_jit_cache
+{% endif %}
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/torch ${SITE_PACKAGES}/torch
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/vllm ${SITE_PACKAGES}/vllm
 {% if platform == "amd64" -%}
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/vllm_omni ${SITE_PACKAGES}/vllm_omni
 {% endif -%}
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/triton ${SITE_PACKAGES}/triton
+{% if device == "cuda" %}
 COPY --chmod=775 --chown=dynamo:0 --from=framework ${SITE_PACKAGES}/flashinfer_cubin ${SITE_PACKAGES}/flashinfer_cubin
+{% endif %}
 # Remaining packages and venv structure (bin/, include/, share/, etc.)
 COPY --chmod=775 --chown=dynamo:0 --from=framework \
    --exclude=lib/python*/site-packages/nvidia \
@@ -166,26 +221,37 @@ COPY --chown=dynamo:0 --from=framework /opt/vllm /opt/vllm
 # Copy UCX and NIXL to system directories (read-only, no group-write needed)
 COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx
 COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
+{% if device == "xpu" %}
+COPY --chown=dynamo: --from=wheel_builder /opt/intel/intel_nixl/lib/${ARCH_ALT}-linux-gnu/. ${NIXL_LIB_DIR}/
+{% else %}
 COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
+{% endif %}
 COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
 COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
+{% if device == "cuda" %}
 # Copy AWS SDK C++ libraries (required for NIXL OBJ backend / S3 support)
 COPY --chown=dynamo: --from=wheel_builder /usr/local/lib64/libaws* /usr/local/lib/
 COPY --chown=dynamo: --from=wheel_builder /usr/local/lib64/libs2n* /usr/local/lib/
 COPY --chown=dynamo: --from=wheel_builder /usr/lib64/libcrypto.so.1.1* /usr/local/lib/
 COPY --chown=dynamo: --from=wheel_builder /usr/lib64/libssl.so.1.1* /usr/local/lib/
+{% endif %}
 ENV PATH=/usr/local/ucx/bin:$PATH
 ENV LD_LIBRARY_PATH=\
-/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
 $NIXL_LIB_DIR:\
 $NIXL_PLUGIN_DIR:\
 /usr/local/ucx/lib:\
 /usr/local/ucx/lib/ucx:\
 $LD_LIBRARY_PATH
+{% if device == "cuda" %}
+ENV LD_LIBRARY_PATH=\
+/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
+$LD_LIBRARY_PATH
 ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
+{% endif %}
 # TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
 COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
@@ -226,6 +292,7 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    uv pip install /opt/dynamo/wheelhouse/nixl/nixl*.whl
 {% endif %}
+{% if device == "cuda" %}
 # Install gpu_memory_service wheel if enabled (all targets)
 ARG ENABLE_GPU_MEMORY_SERVICE
 RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
@@ -235,7 +302,6 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
        if [ -n "$GMS_WHEEL" ]; then uv pip install "$GMS_WHEEL"; fi; \
    fi
 # Install ModelExpress for P2P weight transfer (optional)
 ARG ENABLE_MODELEXPRESS_P2P
 ARG MODELEXPRESS_REF
@@ -243,6 +309,7 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \
        echo "Installing ModelExpress from ref: ${MODELEXPRESS_REF}" && \
        uv pip install "modelexpress @ git+https://github.com/ai-dynamo/modelexpress.git@${MODELEXPRESS_REF}#subdirectory=modelexpress_client/python"; \
    fi
+{% endif %}
 # Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
@@ -274,6 +341,7 @@ RUN chmod g+w /workspace /workspace/* /opt/dynamo /opt/dynamo/* ${VIRTUAL_ENV} &
    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
+{% if device == "cuda" %}
 # Fix library symlinks that Docker COPY dereferenced (COPY always follows symlinks)
 # This recreates proper symlinks to save space and suppress ldconfig warnings
 RUN cd /usr/local/lib && \
@@ -304,15 +372,25 @@ RUN cd /usr/local/lib && \
        fi; \
    done && \
    ldconfig
+{% endif %}
 USER dynamo
 ARG DYNAMO_COMMIT_SHA
 ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
+{% if device == "xpu" %}
+RUN uv pip uninstall triton triton-xpu && \
+    uv pip install triton-xpu==3.6.0 --extra-index-url=https://download.pytorch.org/whl/test/xpu && \
+    uv pip uninstall oneccl && \
+    uv pip uninstall oneccl-devel
+SHELL ["bash", "-c"]
+CMD ["bash", "-c", "source /etc/bash.bashrc && exec bash"]
+{% else %}
 # In vLLM 0.12 the default sampler changed on the forward pass.
 # We need to enable this to enable the cuda kernels.
 ENV VLLM_USE_FLASHINFER_SAMPLER=1
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
+{% endif %}
--- a/container/templates/wheel_builder.Dockerfile
+++ b/container/templates/wheel_builder.Dockerfile
@@ -7,21 +7,21 @@
 ##### Wheel Build Image ##########
 ##################################
-# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
+FROM ${WHEEL_BUILDER_IMAGE} AS wheel_builder
-ARG ARCH_ALT
-FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
 # Redeclare ARGs for this stage
 ARG ARCH
 ARG ARCH_ALT
 ARG CARGO_BUILD_JOBS
+ARG DEVICE
 WORKDIR /workspace
+{% if device == "cuda" %}
 # Copy CUDA from base stage
 COPY --from=dynamo_base /usr/local/cuda /usr/local/cuda
 COPY --from=dynamo_base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
+{% endif %}
 # Set environment variables first so they can be used in COPY commands
 ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
@@ -34,6 +34,71 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
 COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
 COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
+{% if device == "xpu" %}
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    add-apt-repository -y ppa:kobuk-team/intel-graphics
+RUN wget --tries=3 --waitretry=5 https://raw.githubusercontent.com/intel/llm-scaler/35a14cbc08d714f460a29b7a7328df5620c8530f/vllm/patches/ai-dynamo-xpu/patches/ucx-v1.12.0.patch -O /tmp/ucx.patch
+RUN apt clean && apt-get update -y && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    curl \
+    #ffmpeg \
+    ca-certificates \
+    zip \
+    unzip \
+    git \
+    libsndfile1 \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    lsb-release \
+    libaio-dev \
+    numactl \
+    wget \
+    vim \
+    linux-libc-dev && \
+    # Install Intel GPU runtime packages
+    apt update -y && apt upgrade -y && \
+    apt-get install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd  \
+    libze-intel-gpu-raytracing intel-ocloc intel-oneapi-compiler-dpcpp-cpp-2025.3 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN apt-get update -y \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        # NIXL build dependencies
+        autoconf \
+        automake \
+        cmake \
+        git-lfs \
+        libtool \
+        meson \
+        net-tools \
+        ninja-build \
+        pybind11-dev \
+        # Rust build dependencies
+        clang \
+        libclang-dev \
+        protobuf-compiler \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+RUN apt-get update -y \
+    && DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall --no-install-recommends \
+        libibverbs-dev \
+        rdma-core \
+        ibverbs-utils \
+        libibumad-dev \
+        libnuma-dev \
+        librdmacm-dev \
+        ibverbs-providers \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+{% endif %}
+{% if device == "cuda" %}
 # Install system dependencies
 # Cache dnf downloads; sharing=locked avoids dnf/rpm races with concurrent builds.
 RUN --mount=type=cache,target=/var/cache/dnf,sharing=locked \
@@ -84,7 +149,7 @@ ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
    LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
    CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
    CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
+{% endif %}
 # Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
 RUN set -eux; \
@@ -104,10 +169,16 @@ RUN set -eux; \
 # Point build tools explicitly at the modern protoc
 ENV PROTOC=/usr/local/bin/protoc
+{% if device == "xpu" %}
+# Install uv package manager
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH
+{% else %}
 ENV CUDA_PATH=/usr/local/cuda \
    PATH=/usr/local/cuda/bin:$PATH \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
+{% endif %}
 # Create virtual environment for building wheels
 ARG PYTHON_VERSION
@@ -120,6 +191,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 ARG NIXL_UCX_REF
 ARG NIXL_REF
+{% if device == "cuda" %}
 ARG NIXL_GDRCOPY_REF
 # Build and install gdrcopy
@@ -129,6 +202,7 @@ RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/g
    rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
    rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
    rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
+{% endif %}
 # sccache binary is pre-installed in dynamo_base; stage it off-PATH so
 # Meson doesn't auto-detect it as a CUDA compiler launcher
@@ -159,7 +233,12 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    if [ "$USE_SCCACHE" = "true" ]; then \
        eval $(/tmp/use-sccache.sh setup-env); \
    fi && \
-    dnf install -y pkg-config && \
+    if [ "$DEVICE" = "xpu" ]; then \
+    apt-get update -y && apt-get install -y pkg-config; \
+    apt-get clean && rm -rf /var/lib/apt/lists/*; \
+    elif [ "$DEVICE" = "cuda" ]; then \
+    dnf install -y pkg-config; \
+    fi && \
    cd /tmp && \
    curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
@@ -195,11 +274,30 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
        eval $(/tmp/use-sccache.sh setup-env); \
    fi && \
    cd /usr/local/src && \
-     git clone https://github.com/openucx/ucx.git && \
+    git clone https://github.com/openucx/ucx.git && \
-     cd ucx && 			     \
+    cd ucx &&  \
-     git checkout $NIXL_UCX_REF &&	 \
+    git checkout $NIXL_UCX_REF &&	 \
-     ./autogen.sh &&      \
+    if [ "$DEVICE" = "xpu" ]; then \
-     ./contrib/configure-release    \
+    git apply --ignore-whitespace /tmp/ucx.patch; \
+    fi && \
+    ./autogen.sh &&      \
+    if [ "$DEVICE" = "xpu" ]; then \
+     ./contrib/configure-release     \
+        --prefix=/usr/local/ucx     \
+        --with-ze                   \
+        --enable-shared             \
+        --disable-static            \
+        --disable-doxygen-doc       \
+        --enable-optimizations      \
+        --enable-cma                \
+        --enable-devel-headers      \
+        --with-verbs                \
+        --with-dm                   \
+        --with-efa                  \
+        --without-cuda              \
+        --enable-mt;                 \
+    elif [ "$DEVICE" = "cuda" ]; then \
+     ./contrib/configure-release     \
        --prefix=/usr/local/ucx     \
        --enable-shared             \
        --disable-static            \
@@ -212,7 +310,8 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
        --with-dm                   \
        --with-gdrcopy=/usr/local   \
        --with-efa                  \
-        --enable-mt &&              \
+        --enable-mt;                 \
+     fi && \
     make -j &&                      \
     make -j install-strip &&        \
     /tmp/use-sccache.sh show-stats "UCX" && \
@@ -220,6 +319,7 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
     echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
     ldconfig
+{% if device == "cuda" %}
 ARG NIXL_LIBFABRIC_REF
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
@@ -248,8 +348,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    /tmp/use-sccache.sh show-stats "LIBFABRIC" && \
    echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
    ldconfig
+{% endif %}
-{% if framework == "vllm" %}
+{% if framework == "vllm" and device == "cuda" %}
 # Build and install AWS SDK C++ (required for NIXL OBJ backend / S3 support)
 ARG AWS_SDK_CPP_VERSION=1.11.581
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
@@ -277,7 +378,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
 {% endif %}
 # build and install nixl
+{% if device == "cuda" %}
 ARG CUDA_MAJOR
+{% endif %}
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
@@ -288,22 +392,38 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    git clone "https://github.com/ai-dynamo/nixl.git" && \
    cd nixl && \
    git checkout ${NIXL_REF} && \
-    PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
+    if [ "$DEVICE" = "cuda" ]; then \
+        PKG_NAME="nixl-cu${CUDA_MAJOR}"; \
+    elif [ "$DEVICE" = "xpu" ]; then \
+        PKG_NAME="nixl-xpu"; \
+    fi && \
    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
    mkdir build && \
-    meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
+    if [ "$DEVICE" = "cuda" ]; then \
-    -Dcudapath_lib="/usr/local/cuda/lib64" \
+        meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-    -Dcudapath_inc="/usr/local/cuda/include" \
+            -Dcudapath_lib="/usr/local/cuda/lib64" \
-    -Ducx_path="/usr/local/ucx" \
+            -Dcudapath_inc="/usr/local/cuda/include" \
-    -Dlibfabric_path="/usr/local/libfabric" && \
+            -Ducx_path="/usr/local/ucx" \
+            -Dlibfabric_path="/usr/local/libfabric"; \
+    elif [ "$DEVICE" = "xpu" ]; then \
+        meson setup build/ --prefix=/opt/intel/intel_nixl --buildtype=release \
+            -Ducx_path="/usr/local/ucx"; \
+    fi && \
    cd build && \
    ninja && \
    ninja install && \
    /tmp/use-sccache.sh show-stats "NIXL"
+{% if device == "xpu" %}
+ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/${ARCH_ALT}-linux-gnu  \
+    NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
+    NIXL_PREFIX=/opt/intel/intel_nixl
+{% else %}
 ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64  \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
    NIXL_PREFIX=/opt/nvidia/nvda_nixl
+{% endif %}
 ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
 RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
@@ -355,21 +475,24 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    else \
        maturin build --release --out /opt/dynamo/dist; \
    fi && \
-    if [ "$ENABLE_KVBM" == "true" ]; then \
+    if [ "$ENABLE_KVBM" = "true" ]; then \
        cd /opt/dynamo/lib/bindings/kvbm && \
        maturin build --release --out target/wheels && \
-        auditwheel repair \
+        if [ "$DEVICE" = "cuda" ]; then \
-            --exclude libnixl.so \
+            auditwheel repair \
-            --exclude libnixl_build.so \
+                --exclude libnixl.so \
-            --exclude libnixl_common.so \
+                --exclude libnixl_build.so \
-            --exclude 'lib*.so*' \
+                --exclude libnixl_common.so \
-            --plat manylinux_2_28_${ARCH_ALT} \
+                --exclude 'lib*.so*' \
-            --wheel-dir /opt/dynamo/dist \
+                --plat manylinux_2_28_${ARCH_ALT} \
-            target/wheels/*.whl; \
+                --wheel-dir /opt/dynamo/dist \
+                target/wheels/*.whl; \
+        elif [ "$DEVICE" = "xpu" ]; then \
+            cp target/wheels/*.whl /opt/dynamo/dist/; \
+        fi; \
    fi && \
    /tmp/use-sccache.sh show-stats "Dynamo"
 {% else %}
 # Dev/local-dev targets do not have pre-built wheels or /workspace source code.
 # After you start the local-dev/dev container, you will need to build from source:
@@ -387,6 +510,8 @@ COPY lib/gpu_memory_service/ /opt/dynamo/lib/gpu_memory_service/
 {% endif %}
 # Build gpu-memory-service wheel → /opt/dynamo/dist/gpu_memory_service*.whl (small C++ extension, fast build -- all targets, all frameworks)
+{% if device == "cuda" %}
+# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
 ARG ENABLE_GPU_MEMORY_SERVICE
 RUN --mount=type=cache,target=/root/.cache/uv \
    if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
@@ -394,3 +519,4 @@ RUN --mount=type=cache,target=/root/.cache/uv \
        source ${VIRTUAL_ENV}/bin/activate && \
        uv build --wheel --out-dir /opt/dynamo/dist /opt/dynamo/lib/gpu_memory_service; \
    fi
+{% endif %}
\ No newline at end of file