# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Dynamo vLLM runtime overlaid on the official DeepSeek-V4 vLLM image. # Shared image for all DeepSeek-V4 recipes (Flash, Pro, ...). # # Base: vllm/vllm-openai:deepseekv4-cu130 — ships vLLM from PR #40760 # (zyongye/vllm:dsv4) with the DeepSeek-V4 kernels, tokenizer_mode, tool+reasoning # parsers, hybrid CSA+HCA attention, MTP speculative decoding, and FP4 indexer. # # We take pre-built dynamo artifacts (wheels, nats, etcd, NIXL, UCX, dynamo.vllm # python worker) from a locally-built Dynamo vLLM runtime image (produced via # /container/README.md) and layer them on top of the dsv4 vLLM image # without touching the vLLM install. # # Build (run from the repo root): # docker build -f recipes/deepseek-v4-pro/container/Dockerfile.dsv4 \ # -t /vllm-dsv4: . # # See recipes/deepseek-v4-pro/container/README.md for build args and # troubleshooting. # # Both base images must be Python 3.12 (verified). # Default to the local tag produced by `container/render.py --framework vllm # --target runtime` + `docker build -t dynamo:latest-vllm-runtime ...`. Override # with --build-arg DYNAMO_SRC_IMAGE=... to use a published release tag instead. ARG DYNAMO_SRC_IMAGE=dynamo:latest-vllm-runtime ARG DSV4_BASE_IMAGE=vllm/vllm-openai:deepseekv4-cu130 FROM ${DYNAMO_SRC_IMAGE} AS dynamo_src FROM ${DSV4_BASE_IMAGE} ENV DEBIAN_FRONTEND=noninteractive # Runtime deps dynamo needs that aren't in the vLLM image (etcd/nats are static # binaries we COPY; libibverbs/rdma-core are needed for NIXL's UCX transport). RUN apt-get update && apt-get install -y --no-install-recommends \ libibverbs1 rdma-core ibverbs-utils libibumad3 \ libnuma1 librdmacm1 ibverbs-providers \ ca-certificates jq curl \ && apt list --upgradable 2>/dev/null | tail -n +2 | grep 'jammy-' | awk -F/ '{print $1}' | xargs -r apt-get install -y --only-upgrade \ && rm -rf /var/lib/apt/lists/* # --- patch vLLM: drop unsupported topk=1024 from sparse attn indexer --- # from https://github.com/vllm-project/vllm/pull/40760/changes/3602f14f0e146b234be911d916e381b4e6a4dc0c # TODO: remove once https://github.com/vllm-project/vllm/pull/40760 lands in the base image. RUN sed -i 's/(512, 1024, 2048)/(512, 2048)/' \ /usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/sparse_attn_indexer.py # --- static binaries --- COPY --from=dynamo_src /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_src /usr/local/bin/etcd /usr/local/bin/etcd ENV PATH=/usr/local/bin/etcd:${PATH} # --- UCX --- COPY --from=dynamo_src /usr/local/ucx /usr/local/ucx ENV PATH=/usr/local/ucx/bin:${PATH} # --- NIXL (C++ libs for KV transfer) --- COPY --from=dynamo_src /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \ NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} # --- install dynamo python wheels into the dsv4 image's system python --- # The dsv4 image uses system python3.12 with pip at /usr/local/lib/python3.12/dist-packages. # ai_dynamo_runtime is abi3 (cp310+), compatible with cp312. COPY --from=dynamo_src /opt/dynamo/wheelhouse /opt/dynamo/wheelhouse RUN pip install --no-cache-dir \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl # --- dynamo python source (dynamo.vllm worker + common + mocker) --- # Bring the worker entrypoint tree so `python -m dynamo.vllm` resolves. COPY --from=dynamo_src /workspace/components/src/dynamo /workspace/components/src/dynamo ENV PYTHONPATH=/workspace/components/src:${PYTHONPATH:-} WORKDIR /workspace # --- dynamo runtime env tweaks --- # Keep vLLM's flashinfer sampler (enabled by default in 0.20+ but explicit here). ENV VLLM_USE_FLASHINFER_SAMPLER=1 # Default to bash so the Dynamo CRD operator can exec `python3 -m dynamo.vllm` # via the manifest command/args rather than the vLLM api_server entrypoint. ENTRYPOINT [] CMD ["bash"]