# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG BASE_IMAGE_TAG="25.03-py3" ARG RELEASE_BUILD # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base WORKDIR /opt/nixl # Add a cache hint that only changes when the nixl commit changes ARG NIXL_COMMIT # This line acts as a cache key - it only changes when NIXL_COMMIT changes RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt # Copy the nixl source COPY --from=nixl . . ################################## ########## Build Image ########### ################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build # Redeclare ARCH and ARCH_ALT so they're available in this build stage ARG ARCH ARG ARCH_ALT USER root # Install utilities RUN apt update -y && \ apt install -y \ git \ wget \ curl \ nvtop \ tmux \ vim \ ## NIXL dependencies meson \ ninja-build ### UCX EFA Setup ### RUN rm -rf /opt/hpcx/ucx RUN rm -rf /usr/local/ucx RUN cd /usr/local/src && \ git clone https://github.com/openucx/ucx.git && \ cd ucx && \ git checkout v1.19.x && \ ./autogen.sh && ./configure \ --prefix=/usr/local/ucx \ --enable-shared \ --disable-static \ --disable-doxygen-doc \ --enable-optimizations \ --enable-cma \ --enable-devel-headers \ --with-cuda=/usr/local/cuda \ --with-verbs \ --with-efa \ --with-dm \ --with-gdrcopy=/usr/local \ --enable-mt && \ make -j && \ make -j install-strip && \ ldconfig ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:/usr/local/cuda/compat/lib.real:$LD_LIBRARY_PATH ENV CPATH=/usr/include:$CPATH ENV PATH=/usr/bin:$PATH ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH SHELL ["/bin/bash", "-c"] # NIXL SETUP # Copy nixl source, and use commit hash as cache hint COPY --from=nixl_base /opt/nixl /opt/nixl COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt RUN if [ "$ARCH" = "arm64" ]; then \ cd /opt/nixl && \ mkdir build && \ meson setup build/ --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \ cd build/ && \ ninja && \ ninja install; \ else \ cd /opt/nixl && \ mkdir build && \ meson setup build/ --prefix=/usr/local/nixl && \ cd build/ && \ ninja && \ ninja install; \ fi ENV NIXL_PREFIX=/usr/local/nixl # nats RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \ dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb # etcd ENV ETCD_VERSION="v3.5.18" RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH ARG HAS_TRTLLM_CONTEXT=0 ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" COPY --from=trtllm_wheel . /trtllm_wheel/ # TODO: Currently, ABI compatibility issues with TRTLLM wheel and NGC PyTorch prevent us # from using the TRTLLM wheel in a uv venv. Once the issues are resolved, we can # use uv to install TensorRT-LLM wheel within the uv venv. RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ # Install from local wheel directory in build context WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \ if [ -n "$WHEEL_FILE" ]; then \ pip install "$WHEEL_FILE"; \ else \ echo "No wheel file found in /trtllm_wheel directory."; \ exit 1; \ fi; \ else \ # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI pip install --index-url "${TENSORRTLLM_INDEX_URL}" \ --extra-index-url https://pypi.org/simple \ "${TENSORRTLLM_PIP_WHEEL}" ; \ fi # Install genai-perf for benchmarking # TODO: Move to published pypi tags ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24" RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" # Install test dependencies RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ pip install --requirement /tmp/requirements.txt ### MISC UTILITY SETUP ### # Finish pyright install RUN pyright --help > /dev/null 2>&1 # Enable Git operations in the /workspace directory RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig RUN ln -sf /bin/bash /bin/sh # Rust build/dev dependencies RUN apt-get update && \ apt-get install --no-install-recommends -y \ gdb \ protobuf-compiler \ cmake \ libssl-dev \ pkg-config \ libclang-dev ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.86.0 # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # Install Rust using RUSTARCH derived from ARCH_ALT RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # TODO: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ENV CARGO_TARGET_DIR=/workspace/target # Install uv, create virtualenv for general use, and build dynamo wheel COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ ### VIRTUAL ENVIRONMENT SETUP ### RUN mkdir /opt/dynamo && \ uv venv /opt/dynamo/venv --python 3.12 ################################### ####### WHEEL BUILD STAGE ######### ################################### # Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder ARG RELEASE_BUILD ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} WORKDIR /workspace RUN yum update -y \ && yum install -y llvm-toolset python3.12-devel \ && yum install -y protobuf-compiler \ && yum clean all \ && rm -rf /var/cache/yum ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv COPY --from=build $RUSTUP_HOME $RUSTUP_HOME COPY --from=build $CARGO_HOME $CARGO_HOME COPY --from=build /workspace /workspace COPY --from=build $VIRTUAL_ENV $VIRTUAL_ENV COPY --from=build /usr/local/nixl /opt/nvidia/nvda_nixl ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH # Copy configuration files COPY pyproject.toml /workspace/ COPY README.md /workspace/ COPY LICENSE /workspace/ COPY Cargo.toml /workspace/ COPY Cargo.lock /workspace/ COPY rust-toolchain.toml /workspace/ COPY hatch_build.py /workspace/ # Copy source code COPY lib/ /workspace/lib/ COPY components /workspace/components COPY launch /workspace/launch COPY deploy/sdk /workspace/deploy/sdk # Build Rust crate binaries packaged with the wheel RUN cargo build --release --locked --features mistralrs,python \ -p dynamo-run \ -p llmctl \ # Multiple http named crates are present in dependencies, need to specify the path -p file://$PWD/components/http \ -p metrics # Build dynamo wheels RUN uv build --wheel --out-dir /workspace/dist && \ cd /workspace/lib/bindings/python && \ uv build --wheel --out-dir /workspace/dist --python 3.12 && \ if [ "$RELEASE_BUILD" = "true" ]; then \ uv build --wheel --out-dir /workspace/dist --python 3.11 && \ uv build --wheel --out-dir /workspace/dist --python 3.10; \ fi ######################################## ########## Development Image ########### ######################################## FROM build AS dev WORKDIR /workspace COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ COPY --from=wheel_builder /workspace/target/ /workspace/target/ # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY . /workspace # Build rest of the crates # Need to figure out rust caching to avoid rebuilding and remove exclude flags RUN cargo build --release --locked --workspace \ --exclude dynamo-run \ --exclude llmctl \ --exclude file://$PWD/components/http \ --exclude metrics # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ mkdir /opt/dynamo/bindings/lib && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ cp target/release/http /usr/local/bin && \ cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin # Install wheels RUN . /opt/dynamo/venv/bin/activate && \ uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ uv pip install /workspace/dist/ai_dynamo*any.whl # Install dynamo.runtime and dynamo.llm wheels globally in container for tests # TODO: In future, we may use a virtualenv for everything and remove this. RUN pip install dist/ai_dynamo_runtime*cp312*.whl && \ pip install dist/ai_dynamo*any.whl ENV DYNAMO_HOME=/workspace # Use UCX for TRTLLM KV Cache Transfer ENV TRTLLM_USE_UCX_KVCACHE=1 # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc # FIXME: May want a modification with dynamo banner on entry ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []