# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG BASE_IMAGE_TAG="25.03-cuda12.8-devel-ubuntu24.04" ARG RELEASE_BUILD ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" # TODO: Move to published pypi tags ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24" # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base # Redeclare ARCH and ARCH_ALT so they're available in this stage ARG ARCH ARG ARCH_ALT WORKDIR /opt/nixl # Add a cache hint that only changes when the nixl commit changes ARG NIXL_COMMIT # This line acts as a cache key - it only changes when NIXL_COMMIT changes RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt # Copy the nixl source COPY --from=nixl . . ################################## ########## Base Image ############ ################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base # Redeclare ARCH and ARCH_ALT so they're available in this stage ARG ARCH ARG ARCH_ALT USER root ARG PYTHON_VERSION=3.12 RUN apt-get update -y && \ apt-get install -y \ # NIXL build dependencies cmake \ meson \ ninja-build \ pybind11-dev \ # Rust build dependencies libclang-dev \ # Install utilities nvtop \ tmux \ vim WORKDIR /workspace ### NIXL SETUP ### # Copy nixl source, and use commit hash as cache hint COPY --from=nixl_base /opt/nixl /opt/nixl COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt ### NATS & ETCD SETUP ### # nats RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \ dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb # etcd ENV ETCD_VERSION="v3.5.18" RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH ### VIRTUAL ENVIRONMENT SETUP ### # Install uv and create virtualenv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN mkdir /opt/dynamo && \ uv venv /opt/dynamo/venv --python 3.12 # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Install NIXL Python module # TODO: Move gds_path selection based on arch into NIXL build RUN if [ "$ARCH" = "arm64" ]; then \ cd /opt/nixl && uv pip install . --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux/"; \ else \ cd /opt/nixl && uv pip install . ; \ fi # Install patched vllm - keep this early in Dockerfile to avoid # rebuilds from unrelated source code changes ARG VLLM_REF="0.8.4" ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch" ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm" ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4" ARG VLLM_MAX_JOBS=4 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ --mount=type=cache,target=/root/.cache/uv \ mkdir /tmp/vllm && \ uv pip install pip wheel && \ # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details. if [ "$ARCH" = "arm64" ]; then \ # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs uv pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 && \ # Download vLLM source with version matching patch git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \ cd /tmp/vllm/vllm-${VLLM_REF}/ && \ # Patch vLLM source with dynamo additions patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \ # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid # platform detection issues on ARM install. # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR. sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \ # Remove pytorch from vllm install dependencies python use_existing_torch.py && \ # Build/install vllm from source uv pip install -r requirements/build.txt && \ # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can # significantly impact the overall build time. Each job can take up # to -16GB RAM each, so tune according to available system memory. MAX_JOBS=${VLLM_MAX_JOBS} uv pip install . --no-build-isolation ; \ # Handle x86_64: Download wheel, unpack, setup for later steps else \ python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \ # Patch vLLM pre-built download with dynamo additions cd /tmp/vllm && \ wheel unpack *.whl && \ cd vllm-${VLLM_REF}/ && \ patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \ # Rename the package from vllm to ai_dynamo_vllm mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \ sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \ sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \ # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \ # Also update the tag in RECORD file to match sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \ mkdir -p /workspace/dist && \ wheel pack . --dest-dir /workspace/dist && \ uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \ fi # Common dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt # Install test dependencies RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt # ### MISC UTILITY SETUP ### # Finish pyright install RUN pyright --help > /dev/null 2>&1 # Enable Git operations in the /workspace directory RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig RUN ln -sf /bin/bash /bin/sh ### BUILDS ### # Rust build/dev dependencies RUN apt update -y && \ apt install --no-install-recommends -y \ build-essential \ protobuf-compiler \ cmake \ libssl-dev \ pkg-config ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.86.0 # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # Install Rust using RUSTARCH derived from ARCH_ALT RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # TODO: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ####################################### ########## Local Development ########## ####################################### FROM base AS local-dev # https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user # Will use the default ubuntu user, but give sudo access # Needed so files permissions aren't set to root ownership when writing from inside container # Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer ENV USERNAME=ubuntu ARG USER_UID=1000 ARG USER_GID=1000 RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \ && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \ && chmod 0440 /etc/sudoers.d/$USERNAME \ && mkdir -p /home/$USERNAME \ && chown -R $USERNAME:$USERNAME /home/$USERNAME \ && rm -rf /var/lib/apt/lists/* \ && chsh -s /bin/bash $USERNAME # This is a slow operation (~40s on my cpu) # Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu) COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/ RUN chown $USERNAME:$USERNAME /opt/dynamo/venv COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin USER $USERNAME ENV HOME=/home/$USERNAME WORKDIR $HOME # https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \ && mkdir -p $HOME/.commandhistory \ && touch $HOME/.commandhistory/.bash_history \ && echo "$SNIPPET" >> "$HOME/.bashrc" RUN mkdir -p /home/$USERNAME/.cache/ ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ################################## ##### Wheel Build Image ########## ################################## # Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} # Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. ARG RELEASE_BUILD WORKDIR /workspace RUN yum update -y \ && yum install -y python3.12-devel \ && yum install -y protobuf-compiler \ && yum clean all \ && rm -rf /var/cache/yum ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv COPY --from=base $RUSTUP_HOME $RUSTUP_HOME COPY --from=base $CARGO_HOME $CARGO_HOME COPY --from=base /workspace /workspace COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH # Copy configuration files COPY pyproject.toml /workspace/ COPY README.md /workspace/ COPY LICENSE /workspace/ COPY Cargo.toml /workspace/ COPY Cargo.lock /workspace/ COPY rust-toolchain.toml /workspace/ COPY hatch_build.py /workspace/ # Copy source code COPY lib/ /workspace/lib/ COPY components /workspace/components COPY launch /workspace/launch COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk # Build Rust crate binaries packaged with the wheel RUN cargo build --release --locked --features mistralrs,sglang,vllm,python \ -p dynamo-run \ -p llmctl \ # Multiple http named crates are present in dependencies, need to specify the path -p file://$PWD/components/http \ -p metrics # Build dynamo wheel RUN uv build --wheel --out-dir /workspace/dist && \ cd /workspace/lib/bindings/python && \ uv build --wheel --out-dir /workspace/dist --python 3.12 && \ if [ "$RELEASE_BUILD" = "true" ]; then \ uv build --wheel --out-dir /workspace/dist --python 3.11 && \ uv build --wheel --out-dir /workspace/dist --python 3.10; \ fi ####################################### ########## CI Minimum Image ########### ####################################### FROM base AS ci_minimum ENV DYNAMO_HOME=/workspace ENV CARGO_TARGET_DIR=/workspace/target WORKDIR /workspace COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ COPY --from=wheel_builder /workspace/target/ /workspace/target/ # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY . /workspace # Build rest of the crates # Need to figure out rust caching to avoid rebuilding and remove exclude flags RUN cargo build --release --locked --workspace \ --exclude dynamo-run \ --exclude llmctl \ --exclude file://$PWD/components/http \ --exclude metrics # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ mkdir /opt/dynamo/bindings/lib && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ cp target/release/http /usr/local/bin && \ cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ uv pip install /workspace/dist/ai_dynamo*any.whl # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc # Tell vllm to use the Dynamo LLM C API for KV Cache Routing ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so ########################################## ########## Perf Analyzer Image ########### ########################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer ARG GENAI_PERF_TAG WORKDIR /workspace # Build and install Perf Analyzer for benchmarking RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev RUN git clone https://github.com/triton-inference-server/perf_analyzer.git RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG} RUN mkdir perf_analyzer/build RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON RUN cmake --build perf_analyzer/build -- -j8 RUN mkdir bin && \ cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/ ######################################## ########## Development Image ########### ######################################## FROM ci_minimum AS dev ARG GENAI_PERF_TAG COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer ENV PATH="/perf/bin:${PATH}" # Install genai-perf for benchmarking RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" RUN uv pip uninstall tritonclient COPY . /workspace ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] #################################### ########## Runtime Image ########### #################################### FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime WORKDIR /workspace ENV DYNAMO_HOME=/workspace ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Setup the python environment COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3-dev && \ rm -rf /var/lib/apt/lists/* && \ uv venv $VIRTUAL_ENV --python 3.12 && \ echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \ ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \ rm -r wheelhouse # Tell vllm to use the Dynamo LLM C API for KV Cache Routing ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so" # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc # Copy examples COPY ./examples examples/ ENTRYPOINT [ "/usr/bin/bash" ] CMD []