# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev USER root # Install utilities RUN apt update -y && apt install -y git wget curl nvtop tmux vim # nats RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb # etcd ENV ETCD_VERSION="v3.5.18" RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 ENV PATH=/usr/local/bin/etcd/:$PATH ### VIRTUAL ENVIRONMENT SETUP ### # Install uv and create virtualenv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN mkdir /opt/dynamo && \ uv venv /opt/dynamo/venv --python 3.12 # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Install patched vllm - keep this early in Dockerfile to avoid # rebuilds from unrelated source code changes ARG VLLM_REF="v0.7.2" ARG VLLM_PATCH="vllm_${VLLM_REF}-dynamo-kv-disagg-patch.patch" RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm # Install genai-perf for benchmarking # TODO: Move to tag when fix for genai-perf will be released ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529" RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" # Install test dependencies RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt ### MISC UTILITY SETUP ### # Finish pyright install RUN pyright --help > /dev/null 2>&1 # Enable Git operations in the /workspace directory RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig RUN ln -sf /bin/bash /bin/sh ### BUILDS ### # Rust build/dev dependencies RUN apt update -y && \ apt install -y \ build-essential \ protobuf-compiler \ cmake \ libssl-dev \ pkg-config && \ curl https://sh.rustup.rs -sSf | bash -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu # Working directory WORKDIR /workspace # Copy Python wheel configuration files COPY pyproject.toml /workspace/ COPY README.md /workspace/ COPY LICENSE /workspace/ # Build Rust runtime COPY lib/runtime /workspace/lib/runtime RUN cd lib/runtime && \ cargo build --release --locked && cargo doc --no-deps # Build OpenAI HTTP Service binaries COPY lib/llm /workspace/lib/llm COPY components /workspace/components RUN cd components && \ cargo build --release && \ cp target/release/http /usr/local/bin/ # Build Dynamo Run binaries COPY launch /workspace/launch RUN cd launch && \ cargo build --release --features mistralrs,sglang,vllm,python && \ cp target/release/dynamo-run /usr/local/bin/ && \ cp target/release/llmctl /usr/local/bin/ # Generate C bindings for kv cache routing in vLLM COPY lib/bindings /workspace/lib/bindings RUN cd lib/bindings/c && \ cargo build --release --locked && cargo doc --no-deps # Generate C bindings for kv cache routing in vLLM COPY lib/bindings /workspace/lib/bindings RUN cd lib/bindings/c && \ cargo build --release --locked && cargo doc --no-deps COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk # Build dynamo wheel RUN source /opt/dynamo/venv/bin/activate && \ uv build --wheel --out-dir /workspace/dist && \ uv pip install /workspace/dist/ai_dynamo*cp312*.whl && \ cd /workspace/deploy/dynamo/sdk && \ uv build --wheel --out-dir /workspace/dist && \ uv pip install /workspace/dist/ai_dynamo_sdk*any.whl # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ mkdir /opt/dynamo/bindings/lib && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp lib/bindings/c/target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. # Tell vllm to use the Dynamo LLM C API for KV Cache Routing ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so" # FIXME: Copy more specific folders in for dev/debug after directory restructure COPY . /workspace # FIXME: May want a modification with dynamo banner on entry ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ### Lean Runtime Image Stage ### # FIXME: Separate build and runtime images FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime USER root # Install tools for interactive convenience RUN apt update -y && \ apt install -y curl tmux vim && \ echo "set -g mouse on" >> /root/.tmux.conf # Set environment variables ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so" # Copy binaries COPY --from=dev /usr/local/bin/http /usr/local/bin/http COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server COPY --from=dev /bin/uv /usr/local/bin/uv COPY --from=dev /bin/uvx /usr/local/bin/uvx # Copy venv with installed packages RUN uv python install 3.12 COPY --from=dev /opt/vllm /opt/vllm COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Copy minimal set of files for testing. May consider separate stage for testing # if test dependencies start to negatively impact deployment environment/size. COPY pyproject.toml /workspace/pyproject.toml COPY container/deps/vllm /workspace/container/deps/vllm # Add library for KV routing COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH} # Copy minimal set of files for deployment/examples # FIXME: Use a more consolidated path after directory restructure COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm WORKDIR /workspace # FIXME: May want a modification with dynamo banner on entry ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []