# syntax=docker/dockerfile:1.10.0 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # This section contains build arguments that are common and shared with # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh. ARG BASE_IMAGE ARG BASE_IMAGE_TAG ARG PYTHON_VERSION ARG ENABLE_KVBM ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG CUDA_VERSION="12.8" # Make sure to update the dependency version in pyproject.toml when updating this ARG VLLM_REF="v0.11.0" # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds ARG FLASHINF_REF="v0.3.1" ARG TORCH_BACKEND="cu128" # If left blank, then we will fallback to vLLM defaults ARG DEEPGEMM_REF="" # sccache configuration - inherit from base build ARG USE_SCCACHE ARG SCCACHE_BUCKET="" ARG SCCACHE_REGION="" # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base ######################################################## ########## Framework Development Image ################ ######################################################## # # PURPOSE: Framework development and vLLM compilation # # This stage builds and compiles framework dependencies including: # - vLLM inference engine with CUDA support # - DeepGEMM and FlashInfer optimizations # - All necessary build tools and compilation dependencies # - Framework-level Python packages and extensions # # Use this stage when you need to: # - Build vLLM from source with custom modifications # - Develop or debug framework-level components # - Create custom builds with specific optimization flags # # Use dynamo base image (see /container/Dockerfile for more details) FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework ARG PYTHON_VERSION RUN apt-get update -y \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ build-essential \ # vLLM build dependencies cmake \ ibverbs-providers \ ibverbs-utils \ libibumad-dev \ libibverbs-dev \ libnuma-dev \ librdmacm-dev \ rdma-core \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # if libmlx5.so not shipped with 24.04 rdma-core packaging, CMAKE will fail when looking for # generic dev name .so so we symlink .s0.1 -> .so RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true ### VIRTUAL ENVIRONMENT SETUP ### COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Create virtual environment RUN mkdir -p /opt/dynamo/venv && \ uv venv /opt/dynamo/venv --python $PYTHON_VERSION # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" ARG ARCH # Install vllm - keep this early in Dockerfile to avoid # rebuilds from unrelated source code changes ARG VLLM_REF ARG VLLM_GIT_URL ARG DEEPGEMM_REF ARG FLASHINF_REF ARG TORCH_BACKEND ARG CUDA_VERSION ARG MAX_JOBS=16 ENV MAX_JOBS=$MAX_JOBS ENV CUDA_HOME=/usr/local/cuda # Install sccache if requested COPY container/use-sccache.sh /tmp/use-sccache.sh # Install sccache if requested ARG USE_SCCACHE ARG ARCH_ALT ARG SCCACHE_BUCKET ARG SCCACHE_REGION ENV ARCH_ALT=${ARCH_ALT} RUN if [ "$USE_SCCACHE" = "true" ]; then \ /tmp/use-sccache.sh install; \ fi # Set environment variables - they'll be empty strings if USE_SCCACHE=false ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} # Install VLLM and related dependencies RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ --mount=type=cache,target=/root/.cache/uv \ --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \ /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \ /tmp/use-sccache.sh show-stats "vLLM"; ENV LD_LIBRARY_PATH=\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ $LD_LIBRARY_PATH ################################################## ########## Runtime Image ######################## ################################################## # # PURPOSE: Production runtime environment # # This stage creates a lightweight production-ready image containing: # - Pre-compiled vLLM and framework dependencies # - Dynamo runtime libraries and Python packages # - Essential runtime dependencies and configurations # - Optimized for inference workloads and deployment # # Use this stage when you need: # - Production deployment of Dynamo with vLLM # - Minimal runtime footprint without build tools # - Ready-to-run inference server environment # - Base for custom application containers # FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime WORKDIR /workspace ENV DYNAMO_HOME=/opt/dynamo ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ARG ARCH_ALT ARG PYTHON_VERSION ARG ENABLE_KVBM ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins ARG DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA # Install Python, build-essential and python3-dev as apt dependencies RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ build-essential \ # jq and curl for polling various endpoints and health checks jq \ git \ git-lfs \ curl \ # Libraries required by UCX to find RDMA devices libibverbs1 rdma-core ibverbs-utils libibumad3 \ libnuma1 librdmacm1 ibverbs-providers \ # JIT Kernel Compilation, flashinfer ninja-build \ g++ \ # prometheus dependencies ca-certificates \ # DeepGemm uses 'cuobjdump' which does not come with CUDA image cuda-command-line-tools-12-8 && \ rm -rf /var/lib/apt/lists/* # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/ COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ RUN ln -s /usr/local/cuda/lib64/libcublas.so.12 /usr/local/cuda/lib64/libcublas.so RUN ln -s /usr/local/cuda/lib64/libcublasLt.so.12 /usr/local/cuda/lib64/libcublasLt.so ### COPY NATS & ETCD ### # Copy nats and etcd from dev image COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH # DeepGemm runs nvcc for JIT kernel compilation, however the CUDA include path # is not properly set for complilation. Set CPATH to help nvcc find the headers. ENV CPATH=/usr/local/cuda/include # Copy uv to system /bin COPY --from=framework /bin/uv /bin/uvx /bin/ # Create dynamo user with group 0 for OpenShift compatibility RUN userdel -r ubuntu > /dev/null 2>&1 || true \ && useradd -m -s /bin/bash -g 0 dynamo \ && [ `id -u dynamo` -eq 1000 ] \ && mkdir -p /home/dynamo/.cache /opt/dynamo \ && chown -R dynamo: /workspace /home/dynamo /opt/dynamo \ && chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo USER dynamo ENV HOME=/home/dynamo # Copy UCX and NIXL to system directories COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX ENV PATH=/usr/local/ucx/bin:$PATH ENV LD_LIBRARY_PATH=\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ $LD_LIBRARY_PATH ### VIRTUAL ENVIRONMENT SETUP ### # Copy entire virtual environment from framework container with correct ownership COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Copy vllm with correct ownership COPY --chown=dynamo: --from=framework /opt/vllm /opt/vllm # Install dynamo, NIXL, and dynamo-specific dependencies COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/ COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ RUN uv pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \ && if [ "${ENABLE_KVBM}" = "true" ]; then \ uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \ fi \ && cd /opt/dynamo/benchmarks \ && UV_GIT_LFS=1 uv pip install --no-cache . \ && cd - \ && rm -rf /opt/dynamo/benchmarks # Install common and test dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ UV_GIT_LFS=1 uv pip install \ --no-cache \ --requirement /tmp/requirements.txt \ --requirement /tmp/requirements.test.txt # Copy benchmarks, examples, and tests for CI with correct ownership COPY --chown=dynamo: . /workspace/ # Copy attribution files COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/ # Setup launch banner in common directory accessible to all users RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen # Setup environment for all users USER root RUN chmod 755 /opt/dynamo/.launch_screen && \ echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc USER dynamo ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ########################################################### ########## Development (run.sh, runs as root user) ######## ########################################################### # # PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) # # This stage runs as root and provides: # - Development tools and utilities for local debugging # - Support for vscode/cursor development outside the Dev Container plug-in # # Use this stage if you need a full-featured development environment with extra tools, # but do not use it with the Dev Container plug-in. FROM runtime AS dev # Don't want ubuntu to be editable, just change uid and gid. ARG WORKSPACE_DIR=/workspace ARG DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA USER root # Install utilities as root RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ # Install utilities nvtop \ wget \ tmux \ vim \ git \ openssh-client \ iproute2 \ rsync \ zip \ unzip \ htop \ # Build Dependencies autoconf \ automake \ cmake \ libtool \ meson \ net-tools \ pybind11-dev \ # Rust build dependencies clang \ libclang-dev \ protobuf-compiler && \ rm -rf /var/lib/apt/lists/* # Set workspace directory variable ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ DYNAMO_HOME=${WORKSPACE_DIR} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv \ PATH=/usr/local/cargo/bin:$PATH COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo # Install maturin, for maturin develop # Editable install of dynamo RUN uv pip install maturin[patchelf] && \ uv pip install --no-deps -e . ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []