# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG BASE_IMAGE_TAG="25.06-py3" ARG RELEASE_BUILD ARG ENABLE_KVBM=false ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04" # TensorRT-LLM specific configuration ARG HAS_TRTLLM_CONTEXT=0 ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # Python configuration ARG PYTHON_VERSION=3.12 ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base # Copy artifacts from NGC PyTorch image FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework ################################################## ########## Runtime Image ######################## ################################################## # # PURPOSE: Production runtime environment # # This stage creates a lightweight production-ready image containing: # - Pre-compiled TensorRT-LLM and framework dependencies # - Dynamo runtime libraries and Python packages # - Essential runtime dependencies and configurations # - Optimized for inference workloads and deployment # # Use this stage when you need: # - Production deployment of Dynamo with TensorRT-LLM # - Minimal runtime footprint without build tools # - Ready-to-run inference server environment # - Base for custom application containers # FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime WORKDIR /workspace ENV VIRTUAL_ENV=/opt/dynamo/venv ARG ARCH_ALT ARG PYTHON_VERSION ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins # Install Python, build-essential and python3-dev as apt dependencies RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Build tools (required for JIT kernel compilation) build-essential \ g++ \ ninja-build \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ python3-pip \ # CUDA/ML libraries libcudnn9-cuda-12 \ # Network and communication libraries libzmq3-dev \ # RDMA/UCX libraries required to find RDMA devices ibverbs-providers \ ibverbs-utils \ libibumad3 \ libibverbs1 \ libnuma1 \ librdmacm1 \ rdma-core \ # OpenMPI dependencies openssh-client \ openssh-server \ # System utilities ca-certificates \ curl \ jq && \ rm -rf /var/lib/apt/lists/* # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/ COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ COPY --from=framework /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/ COPY --from=framework /usr/local/lib/lib* /usr/local/lib/ ### COPY NATS & ETCD ### # Copy nats and etcd from dynamo_base image COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ # Copy UCX from framework image as plugin for NIXL # Copy NIXL source from framework image # Copy dynamo wheels for gitlab artifacts COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX # Copy OpenMPI from framework image COPY --from=framework /opt/hpcx/ompi /opt/hpcx/ompi # Copy NUMA library from framework image COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/ ENV DYNAMO_HOME=/workspace ENV LD_LIBRARY_PATH=\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ /opt/hpcx/ompi/lib:\ $LD_LIBRARY_PATH ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" ENV OPAL_PREFIX=/opt/hpcx/ompi ### VIRTUAL ENVIRONMENT SETUP ### COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin # Create virtual environment RUN mkdir -p /opt/dynamo/venv && \ uv venv /opt/dynamo/venv --python $PYTHON_VERSION # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" # Copy pytorch installation from NGC PyTorch ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6 ARG TORCHVISION_VER=0.22.0a0+95f10a4e ARG SETUPTOOLS_VER=78.1.1 ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal ARG JINJA2_VER=3.1.6 ARG NETWORKX_VER=3.5 ARG SYMPY_VER=1.14.0 ARG PACKAGING_VER=23.2 ARG FLASH_ATTN_VER=2.7.4.post1 ARG MPMATH_VER=1.3.0 COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2 COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/ COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info # Install TensorRT-LLM and related dependencies ARG HAS_TRTLLM_CONTEXT ARG TENSORRTLLM_PIP_WHEEL ARG TENSORRTLLM_INDEX_URL # Copy only wheel files from trtllm_wheel stage from build_context COPY --from=trtllm_wheel /*.whl /trtllm_wheel/ # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel # because there might be mismatched versions of TensorRT between the NGC PyTorch # and the TRTLLM wheel. RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ # Install from local wheel directory in build context WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \ if [ -n "$WHEEL_FILE" ]; then \ uv pip install "$WHEEL_FILE"; \ else \ echo "No wheel file found in /trtllm_wheel directory."; \ exit 1; \ fi; \ else \ # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \ fi # Install dynamo, NIXL, and dynamo-specific dependencies COPY benchmarks/ /opt/dynamo/benchmarks/ COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ RUN uv pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \ /opt/dynamo/benchmarks && \ rm -rf /opt/dynamo/benchmarks # Install common and test dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt # Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/ COPY --from=framework /opt/hpcx /opt/hpcx # This is needed to make libucc.so visible so pytorch can use it. ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}" # Might not need to copy cusparseLt in the future once it's included in DLFW cuda container # networkx, packaging, setuptools get overridden by trtllm installation, so not copying them # pytorch-triton is copied after trtllm installation. COPY --from=framework /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ # Copy tests, benchmarks, deploy and components for CI COPY tests /workspace/tests COPY examples /workspace/examples COPY benchmarks /workspace/benchmarks COPY deploy /workspace/deploy COPY components/ /workspace/components/ # Copy attribution files COPY ATTRIBUTION* LICENSE /workspace/ # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc && \ echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ########################################################### ########## Development (run.sh, runs as root user) ######## ########################################################### # # PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) # # This stage runs as root and provides: # - Development tools and utilities for local debugging # - Support for vscode/cursor development outside the Dev Container plug-in # # Use this stage if you need a full-featured development environment with extra tools, # but do not use it with the Dev Container plug-in. FROM runtime AS dev # Don't want ubuntu to be editable, just change uid and gid. ARG WORKSPACE_DIR=/workspace # Install utilities as root RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ # Install utilities nvtop \ wget \ tmux \ vim \ git \ iproute2 \ rsync \ zip \ unzip \ htop \ # Build Dependencies autoconf \ automake \ cmake \ libtool \ meson \ net-tools \ pybind11-dev \ # Rust build dependencies clang \ libclang-dev \ protobuf-compiler && \ rm -rf /var/lib/apt/lists/* COPY --from=runtime /usr/local/bin /usr/local/bin # Set workspace directory variable ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ DYNAMO_HOME=${WORKSPACE_DIR} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv \ PATH=/usr/local/cargo/bin:$PATH COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo COPY --from=runtime ${VIRTUAL_ENV} ${VIRTUAL_ENV} # so we can use maturin develop RUN uv pip install maturin[patchelf] # Make sure to sync this with the one specified on README.md. # This is a generic PYTHONPATH which works for all the frameworks, so some paths may not be relevant for this particular framework. ENV PYTHONPATH=${WORKSPACE_DIR}:${WORKSPACE_DIR}/components/metrics/src:${WORKSPACE_DIR}/components/frontend/src:${WORKSPACE_DIR}/components/planner/src:${WORKSPACE_DIR}/components/backends/mocker/src:${WORKSPACE_DIR}/components/backends/trtllm/src:${WORKSPACE_DIR}/components/backends/vllm/src:${WORKSPACE_DIR}/components/backends/sglang/src:${WORKSPACE_DIR}/components/backends/llama_cpp/src CMD []