# syntax=docker/dockerfile:1.10.0 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # NOTE FOR dynamo_base AND wheel_builder STAGES: # # All changes to dynamo_base and wheel_builder stages should be replicated across # Dockerfile and Dockerfile. images.: # - Dockerfile # - Dockerfile.vllm # - Dockerfile.sglang # - Dockerfile.trtllm # This duplication was introduced purposely to quickly enable Docker layer caching and # deduplication. Please ensure these stages stay in sync until the duplication can be # addressed. # # Throughout this file, we make certain paths group-writable because this allows # both the dynamo user (UID 1000) and Dev Container users (UID != 1000) to work # properly without needing slow chown -R operations (which can add 2-10 extra # minutes). # # DEVELOPMENT PATHS THAT MUST BE GROUP-WRITABLE (for non-virtualenv containers): # /workspace - Users create/modify project files # /home/dynamo - Users create config/cache files # /home/dynamo/.local - SGLang uses $HOME/.local/lib/python3.10/site-packages for pip install # # HOW TO ACHIEVE GROUP-WRITABLE PERMISSIONS: # 1. SHELL + /etc/profile.d - Login shell sources umask 002 globally for all RUN commands (775/664) # 2. COPY --chmod=775 - Sets permissions on copied children (not destination) # 3. chmod g+w (no -R) - Fixes destination dirs only (milliseconds vs minutes) # This section contains build arguments that are common and shared with # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh. ARG BASE_IMAGE ARG BASE_IMAGE_TAG ARG FRAMEWORK_IMAGE ARG FRAMEWORK_IMAGE_TAG ARG PYTHON_VERSION ARG ENABLE_KVBM ARG ENABLE_MEDIA_NIXL ARG CARGO_BUILD_JOBS ARG CUDA_VERSION ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # sccache configuration - inherit from base build ARG USE_SCCACHE ARG SCCACHE_BUCKET="" ARG SCCACHE_REGION="" # NIXL configuration ARG NIXL_UCX_REF ARG NIXL_REF ARG NIXL_GDRCOPY_REF ################################## ########## Base Image ############ ################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dynamo_base ARG ARCH ARG ARCH_ALT USER root WORKDIR /opt/dynamo # Install uv package manager COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Install NATS server ENV NATS_VERSION="v2.10.28" RUN --mount=type=cache,target=/var/cache/apt \ wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \ dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb # Install etcd ENV ETCD_VERSION="v3.5.21" RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH # Rust Setup # Rust environment setup ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.90.0 # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # Install Rust RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME ################################## ##### Wheel Build Image ########## ################################## # Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder # Redeclare ARGs for this stage ARG ARCH ARG ARCH_ALT ARG CARGO_BUILD_JOBS WORKDIR /workspace # Copy CUDA from base stage COPY --from=dynamo_base /usr/local/cuda /usr/local/cuda COPY --from=dynamo_base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf # Set environment variables first so they can be used in COPY commands ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/opt/dynamo/target \ PATH=/usr/local/cargo/bin:$PATH # Copy artifacts from base stage COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME # Install system dependencies RUN yum groupinstall -y 'Development Tools' && \ dnf install -y almalinux-release-synergy && \ dnf config-manager --set-enabled powertools && \ dnf install -y \ # Build tools cmake \ ninja-build \ clang-devel \ gcc-c++ \ flex \ wget \ # Kernel module build dependencies dkms \ # Protobuf support protobuf-compiler \ # RDMA/InfiniBand support (required for UCX build with --with-verbs) libibverbs \ libibverbs-devel \ rdma-core \ rdma-core-devel \ libibumad \ libibumad-devel \ librdmacm-devel \ numactl-devel # Ensure a modern protoc is available (required for --experimental_allow_proto3_optional) RUN set -eux; \ PROTOC_VERSION=25.3; \ case "${ARCH_ALT}" in \ x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \ aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \ *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \ esac; \ wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \ rm -f /usr/local/bin/protoc /usr/bin/protoc; \ unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \ chmod +x /usr/local/bin/protoc; \ ln -s /usr/local/bin/protoc /usr/bin/protoc; \ protoc --version # Point build tools explicitly at the modern protoc ENV PROTOC=/usr/local/bin/protoc ENV CUDA_PATH=/usr/local/cuda \ PATH=/usr/local/cuda/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \ NVIDIA_DRIVER_CAPABILITIES=video,compute,utility # Create virtual environment for building wheels ARG PYTHON_VERSION ENV VIRTUAL_ENV=/workspace/.venv RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \ uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] ARG NIXL_UCX_REF ARG NIXL_REF ARG NIXL_GDRCOPY_REF # Build and install gdrcopy RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \ cd gdrcopy/packages && \ CUDA=/usr/local/cuda ./build-rpm-packages.sh && \ rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \ rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \ rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm # Install SCCACHE if requested ARG USE_SCCACHE ARG SCCACHE_BUCKET ARG SCCACHE_REGION COPY container/use-sccache.sh /tmp/use-sccache.sh RUN if [ "$USE_SCCACHE" = "true" ]; then \ /tmp/use-sccache.sh install; \ fi # Set SCCACHE environment variables ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ RUSTC_WRAPPER=${USE_SCCACHE:+sccache} # Build and install UCX RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ if [ "$USE_SCCACHE" = "true" ]; then \ export CMAKE_C_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \ fi && \ cd /usr/local/src && \ git clone https://github.com/openucx/ucx.git && \ cd ucx && \ git checkout $NIXL_UCX_REF && \ ./autogen.sh && \ ./contrib/configure-release \ --prefix=/usr/local/ucx \ --enable-shared \ --disable-static \ --disable-doxygen-doc \ --enable-optimizations \ --enable-cma \ --enable-devel-headers \ --with-cuda=/usr/local/cuda \ --with-verbs \ --with-dm \ --with-gdrcopy=/usr/local \ --with-efa \ --enable-mt && \ make -j && \ make -j install-strip && \ /tmp/use-sccache.sh show-stats "UCX" && \ echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ ldconfig # build and install nixl RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ source ${VIRTUAL_ENV}/bin/activate && \ git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" && \ cd nixl && \ mkdir build && \ meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \ -Dcudapath_lib="/usr/local/cuda/lib64" \ -Dcudapath_inc="/usr/local/cuda/include" \ -Ducx_path="/usr/local/ucx" && \ cd build && \ ninja && \ ninja install && \ /tmp/use-sccache.sh show-stats "NIXL" ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \ NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH} RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ ldconfig RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ cd /workspace/nixl && \ uv build . --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION # Copy source code (order matters for layer caching) COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/ COPY launch/ /opt/dynamo/launch/ COPY lib/ /opt/dynamo/lib/ COPY components/ /opt/dynamo/components/ # Build dynamo wheels ARG ENABLE_KVBM RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ if [ "$USE_SCCACHE" = "true" ]; then \ export CMAKE_C_COMPILER_LAUNCHER="sccache" && \ export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \ export RUSTC_WRAPPER="sccache"; \ fi && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ source ${VIRTUAL_ENV}/bin/activate && \ cd /opt/dynamo && \ uv build --wheel --out-dir /opt/dynamo/dist && \ cd /opt/dynamo/lib/bindings/python && \ if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \ maturin build --release --features dynamo-llm/media-nixl --out /opt/dynamo/dist; \ else \ maturin build --release --out /opt/dynamo/dist; \ fi && \ if [ "$ENABLE_KVBM" = "true" ]; then \ cd /opt/dynamo/lib/bindings/kvbm && \ maturin build --release --out target/wheels && \ auditwheel repair \ --exclude libnixl.so \ --exclude libnixl_build.so \ --exclude libnixl_common.so \ --plat manylinux_2_28_${ARCH_ALT} \ --wheel-dir /opt/dynamo/dist \ target/wheels/*.whl; \ fi && \ /tmp/use-sccache.sh show-stats "Dynamo" ######################################################## ########## Framework Development Image ################ ######################################################## # # PURPOSE: Framework development and SGLang/DeepEP/NVSHMEM compilation # # This stage builds and compiles framework dependencies including: # - SGLang inference engine with CUDA support # - DeepEP and NVSHMEM # - All necessary build tools and compilation dependencies # - Framework-level Python packages and extensions # # Use this stage when you need to: # - Build SGLang from source with custom modifications # - Develop or debug framework-level components # - Create custom builds with specific optimization flags # #FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework # Declare all ARGs ARG BUILD_TYPE=all ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0 ARG CMAKE_BUILD_PARALLEL_LEVEL=2 ARG SGL_KERNEL_VERSION=0.3.16.post5 ARG SGLANG_COMMIT=0.5.6 ARG GDRCOPY_COMMIT=v2.4.4 ARG NVSHMEM_VERSION=3.3.9 ARG GRACE_BLACKWELL=false ARG ARCH ARG ARCH_ALT ARG PYTHON_VERSION ARG USE_SCCACHE ARG SCCACHE_BUCKET ARG SCCACHE_REGION ARG CARGO_BUILD_JOBS ARG CUDA_VERSION # Set all environment variables ENV DEBIAN_FRONTEND=noninteractive \ TZ=America/Los_Angeles \ CUDA_HOME=/usr/local/cuda \ GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \ NVSHMEM_DIR=/sgl-workspace/nvshmem/install \ PATH="${PATH}:/usr/local/nvidia/bin" \ LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \ LANG=en_US.UTF-8 \ LANGUAGE=en_US:en \ LC_ALL=en_US.UTF-8 # Combined: Python setup, locale, and all package installation RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common \ && add-apt-repository ppa:deadsnakes/ppa -y \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Python (using other python versions as needed) python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-venv \ python${PYTHON_VERSION}-distutils \ python3-pip \ # Build essentials build-essential \ cmake \ ninja-build \ ccache \ patchelf \ git \ git-lfs \ # Core system utilities tzdata \ locales \ ca-certificates \ dkms \ kmod \ # Command line tools wget \ curl \ jq \ unzip \ # Network utilities netcat-openbsd \ # SSL and pkg-config libssl-dev \ pkg-config \ # MPI and NUMA libopenmpi-dev \ libnuma1 \ libnuma-dev \ numactl \ # InfiniBand/RDMA libibverbs-dev \ libibverbs1 \ libibumad3 \ librdmacm1 \ libnl-3-200 \ libnl-route-3-200 \ libnl-route-3-dev \ libnl-3-dev \ ibverbs-providers \ infiniband-diags \ perftest \ # Development libraries libgoogle-glog-dev \ libgtest-dev \ libjsoncpp-dev \ libunwind-dev \ libboost-all-dev \ libgrpc-dev \ libgrpc++-dev \ libprotobuf-dev \ protobuf-compiler \ protobuf-compiler-grpc \ pybind11-dev \ libhiredis-dev \ libcurl4-openssl-dev \ libczmq4 \ libczmq-dev \ libfabric-dev \ # Package building tools devscripts \ debhelper \ fakeroot \ check \ libsubunit0 \ libsubunit-dev \ # Set Python alternatives && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 \ && update-alternatives --set python /usr/bin/python${PYTHON_VERSION} \ # Set up locale && locale-gen en_US.UTF-8 \ # Cleanup && rm -rf /var/lib/apt/lists/* \ && apt-get clean # Install sccache if requested COPY container/use-sccache.sh /tmp/use-sccache.sh RUN if [ "$USE_SCCACHE" = "true" ]; then \ /tmp/use-sccache.sh install; \ fi # Set environment variables - they'll be empty strings if USE_SCCACHE=false ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} WORKDIR /sgl-workspace # GDRCopy installation RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdrcopy.git \ && cd gdrcopy/packages \ && export CUDA=${CUDA_HOME} \ && ./build-deb-packages.sh \ && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb # Fix DeepEP IBGDA symlink RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so # Create dynamo user with group 0 for OpenShift compatibility RUN userdel -r ubuntu > /dev/null 2>&1 || true \ && useradd -m -s /bin/bash -g 0 dynamo \ && [ `id -u dynamo` -eq 1000 ] \ && mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \ # Non-recursive chown - only the directories themselves, not contents && chown dynamo:0 /sgl-workspace /workspace /home/dynamo /home/dynamo/.cache /opt/dynamo \ # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo) # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh USER dynamo ENV HOME=/home/dynamo # This picks up the umask 002 from the /etc/profile.d/00-umask.sh file for subsequent RUN commands SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"] # Install SGLang (requires CUDA 12.8.1 or 12.9.1). Note that when system-wide packages is not writable, # so it gets installed to ~/.local/lib/python/site-packages. RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \ && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \ && cd sglang \ && case "$CUDA_VERSION" in \ 12.8.1) CUINDEX=128 ;; \ 12.9.1) CUINDEX=129 ;; \ *) echo "Error: Unsupported CUDA version for sglang: $CUDA_VERSION (requires 12.8.1 or 12.9.1)" && exit 1 ;; \ esac \ && python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \ && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \ && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \ && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin # Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200) RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \ curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ && mv nvshmem_src nvshmem \ && rm -f /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \ && if [ "$GRACE_BLACKWELL" = true ]; then \ git clone --depth 1 https://github.com/fzyzcjy/DeepEP.git \ && cd DeepEP \ && git fetch --depth 1 origin ${DEEPEP_GB_COMMIT} \ && git checkout ${DEEPEP_GB_COMMIT}; \ else \ git clone --depth 1 https://github.com/deepseek-ai/DeepEP.git \ && cd DeepEP \ && git fetch --depth 1 origin ${DEEPEP_COMMIT} \ && git checkout ${DEEPEP_COMMIT}; \ fi \ && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh # Build and install NVSHMEM library only (without python library) RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ export CUDA_CXX=$(which nvcc) && \ cd /sgl-workspace/nvshmem && \ if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_USE_NCCL=0 \ NVSHMEM_MPI_SUPPORT=0 \ NVSHMEM_IBGDA_SUPPORT=1 \ NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_USE_GDRCOPY=1 \ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \ cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} && \ /tmp/use-sccache.sh show-stats "NVSHMEM" # Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions # Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ export CC=$(which gcc) && \ export CXX=$(which g++) && \ export CUDA_CXX=$(which nvcc) && \ cd /sgl-workspace/nvshmem && \ if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_USE_NCCL=0 \ NVSHMEM_MPI_SUPPORT=0 \ NVSHMEM_IBGDA_SUPPORT=1 \ NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_USE_GDRCOPY=1 \ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \ cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL} && \ /tmp/use-sccache.sh show-stats "NVSHMEM4PY" # Install DeepEP RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ cd /sgl-workspace/DeepEP && \ NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation . # Copy rust installation from dynamo_base to avoid duplication efforts # Pattern: COPY --chmod=775 ; RUN chmod g+w because COPY --chmod only affects /*, not COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo RUN chmod g+w /usr/local/rustup /usr/local/cargo ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ PATH=/usr/local/cargo/bin:$PATH \ CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} # Install essential Python build tools RUN python3 -m pip install --no-cache-dir \ mooncake-transfer-engine==0.3.6.post1 \ scikit-build-core==0.11.6 \ setuptools-rust==1.12.0 ################################################## ########## Runtime Image ######################## ################################################## # # PURPOSE: Production runtime environment # # This stage creates a production-ready image containing: # - Pre-compiled SGLang, DeepEP, and NVSHMEM components # - Dynamo runtime libraries and Python packages # - Essential runtime dependencies and configurations # - Optimized for inference workloads and deployment # # Use this stage when you need: # - Production deployment of Dynamo with SGLang + DeepEP # - Minimal runtime footprint without build tools # - Ready-to-run inference server environment # FROM framework AS runtime WORKDIR /workspace ARG ARCH ARG ARCH_ALT ARG PYTHON_VERSION ENV DYNAMO_HOME=/opt/dynamo ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=${NIXL_PREFIX}/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=${NIXL_LIB_DIR}/plugins ENV LD_LIBRARY_PATH=\ ${NVSHMEM_DIR}/lib:\ ${NIXL_LIB_DIR}:\ ${NIXL_PLUGIN_DIR}:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ /usr/local/nvidia/lib64:\ ${LD_LIBRARY_PATH} # Copy NATS and ETCD from dynamo_base, and UCX/NIXL from wheel_builder COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/ COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/ ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH # Install Dynamo wheels from dynamo_base wheelhouse # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not COPY --chmod=775 --chown=dynamo:0 benchmarks/ /opt/dynamo/benchmarks/ COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ RUN python3 -m pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \ && cd /opt/dynamo/benchmarks \ && python3 -m pip install --no-cache . \ && cd - \ && rm -rf /opt/dynamo/benchmarks # Install common and test dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ python3 -m pip install \ --no-cache \ --requirement /tmp/requirements.txt \ --requirement /tmp/requirements.test.txt ## Copy attribution files and launch banner with correct ownership COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/ # Copy tests, benchmarks, deploy and components for CI with correct ownership # Pattern: COPY --chmod=775 ; chmod g+w done later as root because COPY --chmod only affects /*, not COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples COPY --chmod=775 --chown=dynamo:0 benchmarks /workspace/benchmarks COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy COPY --chmod=775 --chown=dynamo:0 components/ /workspace/components/ COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/ # Setup launch banner in common directory accessible to all users RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen # Setup environment for all users USER root # Fix directory permissions: COPY --chmod only affects contents, not the directory itself RUN chmod g+w /workspace /workspace/* /opt/dynamo /opt/dynamo/* && \ chown dynamo:0 /workspace /opt/dynamo/ && \ chmod 755 /opt/dynamo/.launch_screen && \ echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc USER dynamo # Copy tests, benchmarks, deploy and components for CI with correct ownership ARG DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ########################################################### ########## Development (run.sh, runs as root user) ######## ########################################################### # # PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) # # This stage runs as root and provides: # - Development tools and utilities for local debugging # - Support for vscode/cursor development outside the Dev Container plug-in # # Use this stage if you need a full-featured development environment with extra tools, # but do not use it with the Dev Container plug-in. FROM runtime AS dev ARG WORKSPACE_DIR=/sgl-workspace/dynamo ARG PYTHON_VERSION # NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with # upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile # For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop # requirements while still accessing all system-installed packages (sglang, torch, deepep, etc.) COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN mkdir -p /opt/dynamo/venv && \ uv venv /opt/dynamo/venv --python $PYTHON_VERSION --system-site-packages ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" USER root # venv permissions are handled by umask 002 set earlier # Install development tools and utilities RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ # System monitoring and debugging tools nvtop \ htop \ gdb \ # Network and system utilities wget \ iproute2 \ net-tools \ openssh-client \ rsync \ lsof \ # File and archive utilities zip \ tree \ # Development and build tools vim \ tmux \ git \ git-lfs \ autoconf \ automake \ cmake \ libtool \ meson \ bear \ ccache \ less \ # Language and development support clang \ libclang-dev \ # Shell and productivity tools zsh \ silversearcher-ag \ cloc \ locales \ # sudo for dev stage sudo \ # NVIDIA tools dependencies gnupg && \ echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \ apt-get update -y && \ apt-get install -y nsight-systems-cli && \ rm -rf /var/lib/apt/lists/* # Install clang-format and clangd RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \ && chmod +x /usr/local/bin/clang-format \ && curl --retry 3 --retry-delay 2 -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \ && unzip clangd.zip \ && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \ && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \ && rm -rf clangd_18.1.3 clangd.zip # Editable install of dynamo COPY --chmod=664 pyproject.toml README.md hatch_build.py /workspace/ RUN python3 -m pip install --no-deps -e . # Install Python development packages RUN python3 -m pip install --no-cache-dir \ maturin[patchelf] \ pytest \ black \ isort \ icdiff \ scikit_build_core \ uv \ pre-commit \ pandas \ matplotlib \ tabulate ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []