# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # FIXME: NCCL will hang with 25.03, so use 25.01 for now # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG RELEASE_BUILD ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # Make sure to update the dependency version in pyproject.toml when updating this ARG SGLANG_VERSION="0.4.9.post6" ################################## ########## Base Image ############ ################################## FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base # Redeclare ARCH and ARCH_ALT so they're available in this stage ARG ARCH ARG ARCH_ALT ARG NIXL_UCX_REF=v1.19.x ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4 ENV NIXL_SRC_DIR=/opt/nixl ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH USER root ARG PYTHON_VERSION=3.12 RUN apt-get update -y && \ apt-get install -y \ # NIXL build dependencies cmake \ meson \ ninja-build \ pybind11-dev \ # These headers are missing with the hpcx installer, required # by UCX to find RDMA devices libibverbs-dev rdma-core ibverbs-utils libibumad-dev \ libnuma-dev librdmacm-dev ibverbs-providers \ # Rust build dependencies clang \ libclang-dev \ git \ # Install utilities nvtop \ tmux \ vim \ autoconf \ libtool \ net-tools WORKDIR /workspace ### UCX EFA Setup ### RUN rm -rf /opt/hpcx/ucx && \ rm -rf /usr/local/ucx && \ echo "Building UCX with reference $NIXL_UCX_REF" && \ cd /usr/local/src && \ git clone https://github.com/openucx/ucx.git && \ cd ucx && \ git checkout $NIXL_UCX_REF && \ ./autogen.sh && ./configure \ --prefix=/usr/local/ucx \ --enable-shared \ --disable-static \ --disable-doxygen-doc \ --enable-optimizations \ --enable-cma \ --enable-devel-headers \ --with-cuda=/usr/local/cuda \ --with-verbs \ --with-efa \ --with-dm \ --with-gdrcopy=/usr/local \ --enable-mt && \ make -j && \ make -j install-strip && \ ldconfig ENV LD_LIBRARY_PATH=\ /usr/lib:/usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ $LD_LIBRARY_PATH ENV CPATH=/usr/include:$CPATH ENV PATH=/usr/bin:$PATH ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH SHELL ["/bin/bash", "-c"] WORKDIR /workspace ### NIXL SETUP ### # Clone nixl source # TEMP: disable gds backend for arm64 RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \ git checkout ${NIXL_REF} && \ if [ "$ARCH" = "arm64" ]; then \ nixl_build_args="-Ddisable_gds_backend=true"; \ else \ nixl_build_args=""; \ fi && \ mkdir build && \ meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \ cd build/ && \ ninja && \ ninja install; ### NATS & ETCD SETUP ### # nats RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \ dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb # etcd ENV ETCD_VERSION="v3.5.21" RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH ### VIRTUAL ENVIRONMENT SETUP ### # Install uv and create virtualenv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN mkdir /opt/dynamo && \ uv venv /opt/dynamo/venv --python 3.12 # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Install NIXL Python module # TODO: Move gds_path selection based on arch into NIXL build # TEMP: disable gds backend for arm64 RUN if [ "$ARCH" = "arm64" ]; then \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \ --config-settings=setup-args="-Ddisable_gds_backend=true"; \ else \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \ fi && \ # Install the wheel # TODO: Move NIXL wheel install to the wheel_builder stage uv pip install /workspace/wheels/nixl/*.whl # Install sglang #TODO: Built wheel should become an artifact which can be cached and reused in subsequent builds ARG SGLANG_VERSION RUN --mount=type=cache,target=/root/.cache/uv \ cd /opt && \ git clone https://github.com/sgl-project/sglang.git && \ cd sglang && \ git checkout v${SGLANG_VERSION} && \ # Install in editable mode for development uv pip install -e "python[all]" # Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager ENV SGL_FORCE_SHUTDOWN=1 # Common dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt # Install test dependencies RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt # ### MISC UTILITY SETUP ### # Finish pyright install RUN pyright --help > /dev/null 2>&1 # Enable Git operations in the /workspace directory RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig ### BUILDS ### # Rust build/dev dependencies RUN apt update -y && \ apt install --no-install-recommends -y \ build-essential \ protobuf-compiler \ cmake \ libssl-dev \ pkg-config ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.87.0 # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # Install Rust using RUSTARCH derived from ARCH_ALT RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # TODO: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ####################################### ########## Local Development ########## ####################################### FROM base AS local-dev # https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user # Will use the default ubuntu user, but give sudo access # Needed so files permissions aren't set to root ownership when writing from inside container # Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer ENV USERNAME=ubuntu ARG USER_UID=1000 ARG USER_GID=1000 RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \ && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \ && chmod 0440 /etc/sudoers.d/$USERNAME \ && mkdir -p /home/$USERNAME \ && chown -R $USERNAME:$USERNAME /home/$USERNAME \ && rm -rf /var/lib/apt/lists/* \ && chsh -s /bin/bash $USERNAME # This is a slow operation (~40s on my cpu) # Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu) COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/ RUN chown $USERNAME:$USERNAME /opt/dynamo/venv COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin USER $USERNAME ENV HOME=/home/$USERNAME ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH WORKDIR $HOME # https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \ && mkdir -p $HOME/.commandhistory \ && touch $HOME/.commandhistory/.bash_history \ && echo "$SNIPPET" >> "$HOME/.bashrc" RUN mkdir -p /home/$USERNAME/.cache/ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ################################## ##### Wheel Build Image ########## ################################## # Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder ARG CARGO_BUILD_JOBS # Set CARGO_BUILD_JOBS to 16 if not provided # This is to prevent cargo from building $(nproc) jobs in parallel, # which might exceed the number of opened files limit. ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} # Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. ARG RELEASE_BUILD # Keep in sync with the base image. ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl WORKDIR /workspace RUN yum update -y \ && yum install -y llvm-toolset \ && yum install -y python3.12-devel \ && yum install -y protobuf-compiler \ && yum clean all \ && rm -rf /var/cache/yum ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv COPY --from=base $RUSTUP_HOME $RUSTUP_HOME COPY --from=base $CARGO_HOME $CARGO_HOME COPY --from=base $NIXL_PREFIX $NIXL_PREFIX COPY --from=base /workspace /workspace COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH # Copy configuration files COPY pyproject.toml /workspace/ COPY README.md /workspace/ COPY LICENSE /workspace/ COPY Cargo.toml /workspace/ COPY Cargo.lock /workspace/ COPY rust-toolchain.toml /workspace/ # Copy source code COPY lib/ /workspace/lib/ COPY components /workspace/components COPY launch /workspace/launch RUN cargo build \ --release \ --locked \ --features dynamo-llm/block-manager \ --workspace # Build dynamo wheel RUN uv build --wheel --out-dir /workspace/dist && \ cd /workspace/lib/bindings/python && \ uv pip install maturin[patchelf] && \ maturin build --release --features block-manager --out /workspace/dist && \ if [ "$RELEASE_BUILD" = "true" ]; then \ uv run --python 3.11 maturin build --release --features block-manager --out /workspace/dist && \ uv run --python 3.10 maturin build --release --features block-manager --out /workspace/dist; \ fi ####################################### ########## CI Minimum Image ########### ####################################### FROM base AS ci_minimum ENV DYNAMO_HOME=/workspace ENV CARGO_TARGET_DIR=/workspace/target WORKDIR /workspace COPY --from=wheel_builder /workspace /workspace COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX # Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME # Copy rest of the code COPY . /workspace # Package the bindings RUN mkdir -p /opt/dynamo/bindings/wheels && \ mkdir /opt/dynamo/bindings/lib && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp target/release/metrics /usr/local/bin RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ uv pip install /workspace/dist/ai_dynamo*any.whl # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH ######################################## ########## Development Image ########### ######################################## FROM ci_minimum AS dev ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] #################################### ########## Runtime Image ########### #################################### FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime WORKDIR /workspace ENV DYNAMO_HOME=/workspace ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ### COPY NATS & ETCD ### # Copy nats and etcd from base image COPY --from=base /usr/bin/nats-server /usr/bin/nats-server COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/ ENV PATH=/usr/local/bin/etcd/:$PATH # Copy UCX from base image as plugin for NIXL # Copy NIXL source from wheel_builder image ARG ARCH_ALT ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins COPY --from=base /usr/local/ucx /usr/local/ucx COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX ENV LD_LIBRARY_PATH=\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ $LD_LIBRARY_PATH # Setup the python environment # libnuma-dev is a required dependency for sglang integration with NIXL COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ build-essential python3-dev libnuma-dev \ # jq and curl for polling various endpoints and health checks curl \ jq \ # For debugging vim \ # Libraries required by UCX to find RDMA devices libibverbs1 rdma-core ibverbs-utils libibumad3 \ libnuma1 librdmacm1 ibverbs-providers && \ rm -rf /var/lib/apt/lists/* && \ uv venv $VIRTUAL_ENV --python 3.12 && \ echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them # Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/ RUN uv pip install "ai-dynamo[sglang]" --pre --find-links wheelhouse # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc # Once UX refactor is merged, we can remove these files # Python components will have been pip installed and packaged in wheel COPY components/ /workspace/components/ # Copy benchmarks, examples, and tests for CI # TODO: Remove this once we have a functional CI image built on top of the runtime image COPY tests /workspace/tests COPY benchmarks /workspace/benchmarks COPY examples /workspace/examples RUN uv pip install /workspace/benchmarks # Copy attribution files COPY ATTRIBUTION* LICENSE /workspace/ ENV PYTHONPATH=/workspace/examples/sglang/utils:$PYTHONPATH ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []