# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # NOTE FOR dynamo_base AND wheel_builder STAGES: # # All changes to dynamo_base and wheel_builder stages should be replicated across # Dockerfile and Dockerfile. images.: # - Dockerfile # - Dockerfile.vllm # - Dockerfile.sglang # - Dockerfile.trtllm # This duplication was introduced purposely to quickly enable Docker layer caching and # deduplication. Please ensure these stages stay in sync until the duplication can be # addressed. # # Throughout this file, we make certain paths group-writable because this allows # both the dynamo user (UID 1000) and Dev Container users (UID != 1000) to work # properly without needing slow chown -R operations (which can add 2-10 extra # minutes). # # DEVELOPMENT PATHS THAT MUST BE GROUP-WRITABLE (for virtualenv containers): # /workspace - Users create/modify project files # /home/dynamo - Users create config/cache files # /opt/dynamo/venv - TensorRT-LLM uses venv, so entire venv must be writable for pip install # # HOW TO ACHIEVE GROUP-WRITABLE PERMISSIONS: # 1. SHELL + /etc/profile.d - Login shell sources umask 002 globally for all RUN commands (775/664) # 2. COPY --chmod=775 - Sets permissions on copied children (not destination) # 3. chmod g+w (no -R) - Fixes destination dirs only (milliseconds vs minutes) # This section contains build arguments that are common and shared with # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh. ARG BASE_IMAGE ARG BASE_IMAGE_TAG ARG PYTHON_VERSION ARG ENABLE_KVBM ARG ENABLE_MEDIA_NIXL ARG CARGO_BUILD_JOBS ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG PYTORCH_BASE_IMAGE_TAG="25.10-py3" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG RUNTIME_IMAGE_TAG="25.10-cuda13.0-runtime-ubuntu24.04" # TensorRT-LLM specific configuration ARG HAS_TRTLLM_CONTEXT=0 ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" ARG TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/" ARG GITHUB_TRTLLM_COMMIT # SCCACHE configuration ARG USE_SCCACHE ARG SCCACHE_BUCKET="" ARG SCCACHE_REGION="" # NIXL configuration ARG NIXL_UCX_REF ARG NIXL_REF ARG NIXL_GDRCOPY_REF # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # Copy artifacts from NGC PyTorch image FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base ################################## ########## Base Image ############ ################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dynamo_base ARG ARCH ARG ARCH_ALT USER root WORKDIR /opt/dynamo # Install uv package manager COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Install NATS server ENV NATS_VERSION="v2.10.28" RUN --mount=type=cache,target=/var/cache/apt \ wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \ dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb # Install etcd ENV ETCD_VERSION="v3.5.21" RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH # Rust Setup # Rust environment setup ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.90.0 # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # Install Rust RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME ################################## ##### Wheel Build Image ########## ################################## # Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder # Redeclare ARGs for this stage ARG ARCH ARG ARCH_ALT ARG CARGO_BUILD_JOBS WORKDIR /workspace # Copy CUDA from base stage COPY --from=dynamo_base /usr/local/cuda /usr/local/cuda COPY --from=dynamo_base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf # Set environment variables first so they can be used in COPY commands ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/opt/dynamo/target \ PATH=/usr/local/cargo/bin:$PATH # Copy artifacts from base stage COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME # Install system dependencies RUN yum groupinstall -y 'Development Tools' && \ dnf install -y almalinux-release-synergy && \ dnf config-manager --set-enabled powertools && \ dnf install -y \ # Build tools cmake \ ninja-build \ clang-devel \ gcc-c++ \ flex \ wget \ # Kernel module build dependencies dkms \ # Protobuf support protobuf-compiler \ # RDMA/InfiniBand support (required for UCX build with --with-verbs) libibverbs \ libibverbs-devel \ rdma-core \ rdma-core-devel \ libibumad \ libibumad-devel \ librdmacm-devel \ numactl-devel # Ensure a modern protoc is available (required for --experimental_allow_proto3_optional) RUN set -eux; \ PROTOC_VERSION=25.3; \ case "${ARCH_ALT}" in \ x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \ aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \ *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \ esac; \ wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \ rm -f /usr/local/bin/protoc /usr/bin/protoc; \ unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \ chmod +x /usr/local/bin/protoc; \ ln -s /usr/local/bin/protoc /usr/bin/protoc; \ protoc --version # Point build tools explicitly at the modern protoc ENV PROTOC=/usr/local/bin/protoc ENV CUDA_PATH=/usr/local/cuda \ PATH=/usr/local/cuda/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \ NVIDIA_DRIVER_CAPABILITIES=video,compute,utility # Create virtual environment for building wheels ARG PYTHON_VERSION ENV VIRTUAL_ENV=/workspace/.venv RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \ uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] ARG NIXL_UCX_REF ARG NIXL_REF ARG NIXL_GDRCOPY_REF # Build and install gdrcopy RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \ cd gdrcopy/packages && \ CUDA=/usr/local/cuda ./build-rpm-packages.sh && \ rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \ rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \ rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm # Install SCCACHE if requested ARG USE_SCCACHE ARG SCCACHE_BUCKET ARG SCCACHE_REGION COPY container/use-sccache.sh /tmp/use-sccache.sh RUN if [ "$USE_SCCACHE" = "true" ]; then \ /tmp/use-sccache.sh install; \ fi # Set SCCACHE environment variables ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ RUSTC_WRAPPER=${USE_SCCACHE:+sccache} # Build and install UCX RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ if [ "$USE_SCCACHE" = "true" ]; then \ export CMAKE_C_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \ fi && \ cd /usr/local/src && \ git clone https://github.com/openucx/ucx.git && \ cd ucx && \ git checkout $NIXL_UCX_REF && \ ./autogen.sh && \ ./contrib/configure-release \ --prefix=/usr/local/ucx \ --enable-shared \ --disable-static \ --disable-doxygen-doc \ --enable-optimizations \ --enable-cma \ --enable-devel-headers \ --with-cuda=/usr/local/cuda \ --with-verbs \ --with-dm \ --with-gdrcopy=/usr/local \ --with-efa \ --enable-mt && \ make -j && \ make -j install-strip && \ /tmp/use-sccache.sh show-stats "UCX" && \ echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ ldconfig # build and install nixl RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ source ${VIRTUAL_ENV}/bin/activate && \ git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \ cd nixl && \ mkdir build && \ meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \ -Dcudapath_lib="/usr/local/cuda/lib64" \ -Dcudapath_inc="/usr/local/cuda/include" \ -Ducx_path="/usr/local/ucx" && \ cd build && \ ninja && \ ninja install && \ /tmp/use-sccache.sh show-stats "NIXL" ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \ NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ ldconfig RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ cd /workspace/nixl && \ uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION # Copy source code (order matters for layer caching) COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/ COPY launch/ /opt/dynamo/launch/ COPY lib/ /opt/dynamo/lib/ COPY components/ /opt/dynamo/components/ # Build dynamo wheels ARG ENABLE_KVBM RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ if [ "$USE_SCCACHE" = "true" ]; then \ export CMAKE_C_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \ export RUSTC_WRAPPER="sccache"; \ fi && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ source ${VIRTUAL_ENV}/bin/activate && \ cd /opt/dynamo && \ uv build --wheel --out-dir /opt/dynamo/dist && \ cd /opt/dynamo/lib/bindings/python && \ if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \ maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \ else \ maturin build --release --out /opt/dynamo/dist; \ fi && \ if [ "$ENABLE_KVBM" = "true" ]; then \ cd /opt/dynamo/lib/bindings/kvbm && \ maturin build --release --out target/wheels && \ auditwheel repair \ --exclude libnixl.so \ --exclude libnixl_build.so \ --exclude libnixl_common.so \ --plat manylinux_2_28_${ARCH_ALT} \ --wheel-dir /opt/dynamo/dist \ target/wheels/*.whl; \ fi && \ /tmp/use-sccache.sh show-stats "Dynamo" ################################################## ########## Framework Builder Stage ############## ################################################## # # PURPOSE: Build TensorRT-LLM with root privileges # # This stage handles TensorRT-LLM installation which requires: # - Root access for apt operations (CUDA repos, TensorRT installation) # - System-level modifications in install_tensorrt.sh # - Virtual environment population with PyTorch and TensorRT-LLM # # The completed venv is then copied to runtime stage with dynamo ownership FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework ARG ARCH_ALT COPY --from=dynamo_base /bin/uv /bin/uvx /bin/ # Install minimal dependencies needed for TensorRT-LLM installation ARG PYTHON_VERSION RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ python${PYTHON_VERSION}-dev \ python3-pip \ curl \ git \ git-lfs \ ca-certificates && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # Create virtual environment RUN mkdir -p /opt/dynamo/venv && \ uv venv /opt/dynamo/venv --python $PYTHON_VERSION ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" # Copy pytorch installation from NGC PyTorch ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10 ARG TORCH_TENSORRT_VER=2.9.0a0 ARG TORCHVISION_VER=0.24.0a0+094e7af5 ARG JINJA2_VER=3.1.6 ARG SYMPY_VER=1.14.0 ARG FLASH_ATTN_VER=2.7.4.post1+25.10 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2 COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/ COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch_tensorrt ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch_tensorrt-${TORCH_TENSORRT_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt-${TORCH_TENSORRT_VER}.dist-info # Install TensorRT-LLM and related dependencies ARG HAS_TRTLLM_CONTEXT ARG TENSORRTLLM_PIP_WHEEL ARG TENSORRTLLM_INDEX_URL ARG GITHUB_TRTLLM_COMMIT # Copy only wheel files and commit info from trtllm_wheel stage from build_context COPY --from=trtllm_wheel /*.whl /trtllm_wheel/ COPY --from=trtllm_wheel /*.txt /trtllm_wheel/ RUN uv pip install --no-cache "cuda-python==13.0.2" # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel # because there might be mismatched versions of TensorRT between the NGC PyTorch # and the TRTLLM wheel. RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ # Clean up any existing conflicting CUDA repository configurations and GPG keys rm -f /etc/apt/sources.list.d/cuda*.list && \ rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \ rm -f /etc/apt/trusted.gpg.d/cuda*.gpg RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \ # Modify the script to use virtual environment pip instead of system pip3 sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ bash /tmp/install_tensorrt.sh && \ # Install from local wheel directory in build context WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \ if [ -n "$WHEEL_FILE" ]; then \ uv pip install --no-cache "$WHEEL_FILE" triton==3.5.0; \ else \ echo "No wheel file found in /trtllm_wheel directory."; \ exit 1; \ fi; \ else \ # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/.*==([0-9a-zA-Z.+-]+).*/\1/') && \ (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \ curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \ # Modify the script to use virtual environment pip instead of system pip3 sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ bash /tmp/install_tensorrt.sh && \ # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI # TRTLLM 1.2.0rc2 has issues installing from pypi with uv, installing from direct wheel link works best # explicitly installing triton 3.5.0 as trtllm only lists triton as dependency on x64_64 for some reason if echo "${TENSORRTLLM_PIP_WHEEL}" | grep -q '^tensorrt-llm=='; then \ TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \ PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \ DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \ uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.0; \ else \ uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.0; \ fi; \ fi ################################################## ########## Runtime Image ######################## ################################################## # # PURPOSE: Production runtime environment # # This stage creates a lightweight production-ready image containing: # - Pre-compiled TensorRT-LLM and framework dependencies # - Dynamo runtime libraries and Python packages # - Essential runtime dependencies and configurations # - Optimized for inference workloads and deployment # # Use this stage when you need: # - Production deployment of Dynamo with TensorRT-LLM # - Minimal runtime footprint without build tools # - Ready-to-run inference server environment # - Base for custom application containers # FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime ARG ARCH_ALT WORKDIR /workspace ENV ENV=${ENV:-/etc/shinit_v2} ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # workaround for pickle lib issue ENV OMPI_MCA_coll_ucc_enable=0 # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/ COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/ COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/ COPY --from=pytorch_base /usr/local/cuda/bin/cuobjdump /usr/local/cuda/bin/cuobjdump COPY --from=pytorch_base /usr/local/cuda/bin/nvdisasm /usr/local/cuda/bin/nvdisasm ENV CUDA_HOME=/usr/local/cuda \ TRITON_CUPTI_PATH=/usr/local/cuda/include \ TRITON_CUDACRT_PATH=/usr/local/cuda/include \ TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \ TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \ TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \ TRITON_CUDART_PATH=/usr/local/cuda/include # Copy OpenMPI from PyTorch base image COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi # Copy NUMA library from PyTorch base image COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/ # Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/ COPY --from=pytorch_base /opt/hpcx /opt/hpcx # This is needed to make libucc.so visible so pytorch can use it. ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}" # Might not need to copy cusparseLt in the future once it's included in DLFW cuda container # networkx, packaging, setuptools get overridden by trtllm installation, so not copying them # pytorch-triton is copied after trtllm installation. COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ # Copy nats and etcd from dynamo_base image COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH # Copy uv to system /bin COPY --from=dynamo_base /bin/uv /bin/uvx /bin/ # Create dynamo user with group 0 for OpenShift compatibility RUN userdel -r ubuntu > /dev/null 2>&1 || true \ && useradd -m -s /bin/bash -g 0 dynamo \ && [ `id -u dynamo` -eq 1000 ] \ && mkdir -p /home/dynamo/.cache /opt/dynamo \ # Non-recursive chown - only the directories themselves, not contents && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \ # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo) # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh # Install Python, build-essential and python3-dev as apt dependencies ARG PYTHON_VERSION RUN if [ ${ARCH_ALT} = "x86_64" ]; then \ ARCH_FOR_GPG=${ARCH_ALT}; \ else \ ARCH_FOR_GPG="sbsa"; \ fi && \ curl -fsSL \ https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH_FOR_GPG}/cuda-archive-keyring.gpg \ -o /usr/share/keyrings/cuda-archive-keyring.gpg &&\ echo "deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] \ https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH_FOR_GPG} /" \ | tee /etc/apt/sources.list.d/cuda.repo.list > /dev/null &&\ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Build tools build-essential \ g++ \ ninja-build \ git \ git-lfs \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ python3-pip \ # jq for polling various endpoints and health checks jq \ # CUDA/ML libraries libcudnn9-cuda-13 \ libnvshmem3-cuda-13 \ # Network and communication libraries libzmq3-dev \ # RDMA/UCX libraries required to find RDMA devices ibverbs-providers \ ibverbs-utils \ libibumad3 \ libibverbs1 \ libnuma1 \ librdmacm1 \ rdma-core \ # OpenMPI dependencies openssh-client \ openssh-server \ # System utilities and dependencies curl && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # Switch to dynamo user USER dynamo ENV HOME=/home/dynamo # This picks up the umask 002 from the /etc/profile.d/00-umask.sh file for subsequent RUN commands SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"] ENV DYNAMO_HOME=/workspace ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins # Copy libgomp.so from framework image COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/ # Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not COPY --chmod=775 --chown=dynamo:0 --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Copy UCX from framework image as plugin for NIXL # Copy NIXL source from framework image # Copy dynamo wheels for gitlab artifacts (read-only, no group-write needed) COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx /usr/local/ucx COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/ COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/ ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" ENV LD_LIBRARY_PATH=\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ /opt/hpcx/ompi/lib:\ /usr/lib/${ARCH_ALT}-linux-gnu/nvshmem/13/:\ $TENSORRT_LIB_DIR:\ /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib:\ /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib:\ $LD_LIBRARY_PATH ENV OPAL_PREFIX=/opt/hpcx/ompi COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/ COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/ # Install dynamo, NIXL, and dynamo-specific dependencies # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not ARG ENABLE_KVBM COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ RUN uv pip install \ --no-cache \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl && \ if [ "${ENABLE_KVBM}" = "true" ]; then \ KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \ if [ -z "$KVBM_WHEEL" ]; then \ echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \ exit 1; \ fi; \ uv pip install --no-cache "$KVBM_WHEEL"; \ fi && \ cd /workspace/benchmarks && \ UV_GIT_LFS=1 uv pip install --no-cache . && \ # pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory) chmod -R g+w /workspace/benchmarks # Install common and test dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ UV_GIT_LFS=1 uv pip install \ --no-cache \ --index-strategy unsafe-best-match \ --extra-index-url https://download.pytorch.org/whl/cu130 \ --requirement /tmp/requirements.txt \ --requirement /tmp/requirements.test.txt \ cupy-cuda13x # Copy tests, deploy and components for CI with correct ownership # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy COPY --chmod=775 --chown=dynamo:0 components/ /workspace/components/ COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/ # Setup launch banner in common directory accessible to all users RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen # Setup environment for all users USER root # Fix directory permissions: COPY --chmod only affects contents, not the directory itself RUN chmod g+w ${VIRTUAL_ENV} /workspace /workspace/* /opt/dynamo /opt/dynamo/* && \ chown dynamo:0 ${VIRTUAL_ENV} /workspace /opt/dynamo/ && \ chmod 755 /opt/dynamo/.launch_screen && \ echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc USER dynamo ARG DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ########################################################### ########## Development (run.sh, runs as root user) ######## ########################################################### # # PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) # # This stage runs as root and provides: # - Development tools and utilities for local debugging # - Support for vscode/cursor development outside the Dev Container plug-in # # Use this stage if you need a full-featured development environment with extra tools, # but do not use it with the Dev Container plug-in. FROM runtime AS dev # Don't want ubuntu to be editable, just change uid and gid. ARG WORKSPACE_DIR=/workspace # Switch to root for system package installation USER root # Install utilities as root RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ # Install utilities nvtop \ wget \ tmux \ vim \ git \ iproute2 \ rsync \ zip \ unzip \ htop \ # Build Dependencies autoconf \ automake \ cmake \ libtool \ meson \ net-tools \ pybind11-dev \ # Rust build dependencies clang \ libclang-dev \ protobuf-compiler && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # Set umask for group-writable files in dev stage (runs as root) RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"] # Set workspace directory variable ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ DYNAMO_HOME=${WORKSPACE_DIR} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv \ PATH=/usr/local/cargo/bin:$PATH # Copy rust installation from dynamo_base to avoid duplication efforts # Pattern: COPY --chmod=775 ; chmod g+w because COPY --chmod only affects /*, not COPY --from=dynamo_base --chmod=775 /usr/local/rustup /usr/local/rustup COPY --from=dynamo_base --chmod=775 /usr/local/cargo /usr/local/cargo RUN chmod g+w /usr/local/rustup /usr/local/cargo # Install maturin, for maturin develop RUN uv pip install --no-cache maturin[patchelf] # Editable install of dynamo COPY pyproject.toml README.md hatch_build.py /workspace/ RUN uv pip install --no-cache --no-deps -e . CMD []