# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # Note: This Dockerfile will be deprecated in favor of Dockerfile.sglang-wideep soon. Please build the container with that Dockerfile instead. ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG RELEASE_BUILD ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" # Make sure to update the dependency version in pyproject.toml when updating this ARG SGLANG_VERSION="0.5.3.post2" # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # # Default values are for x86/amd64: # --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 # # For arm64/aarch64, build with: # --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 # # NOTE: There isn't an easy way to define one of these values based on the other value # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # Python configuration ARG PYTHON_VERSION=3.12 ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base ######################################################## ########## Framework Development Image ################ ######################################################## # # PURPOSE: Framework development and SGLang compilation # # This stage builds and compiles framework dependencies including: # - SGLang inference engine with CUDA support # - All necessary build tools and compilation dependencies # - Framework-level Python packages and extensions # # Use this stage when you need to: # - Build SGLang from source with custom modifications # - Develop or debug framework-level components # - Create custom builds with specific optimization flags # # Use dynamo base image (see /container/Dockerfile for more details) FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework ARG PYTHON_VERSION RUN apt-get update -y \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ build-essential \ git \ git-lfs \ # SGLang build dependencies cmake \ ibverbs-providers \ ibverbs-utils \ libibumad-dev \ libibverbs-dev \ libnuma-dev \ librdmacm-dev \ rdma-core \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* ### VIRTUAL ENVIRONMENT SETUP ### COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ ARG PYTHON_VERSION # Create virtual environment RUN mkdir -p /opt/dynamo/venv && \ uv venv /opt/dynamo/venv --python $PYTHON_VERSION # Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" ARG ARCH # Redeclare ARCH and ARCH_ALT so they're available in this stage ARG ARCH_ALT WORKDIR /workspace # Install SGLang and related dependencies ARG SGLANG_VERSION RUN --mount=type=cache,target=/root/.cache/uv \ cd /opt && \ git clone https://github.com/sgl-project/sglang.git && \ cd sglang && \ git checkout v${SGLANG_VERSION} && \ # Install in editable mode for development uv pip install -e "python[all]" # Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager ENV SGL_FORCE_SHUTDOWN=1 ################################################## ########## Runtime Image ######################## ################################################## # # PURPOSE: Production runtime environment # # This stage creates a lightweight production-ready image containing: # - Pre-compiled SGLang and framework dependencies # - Dynamo runtime libraries and Python packages # - Essential runtime dependencies and configurations # - Optimized for inference workloads and deployment # # Use this stage when you need: # - Production deployment of Dynamo with SGLang # - Minimal runtime footprint without build tools # - Ready-to-run inference server environment # - Base for custom application containers # FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime WORKDIR /workspace ENV DYNAMO_HOME=/opt/dynamo ENV VIRTUAL_ENV=/opt/dynamo/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ARG ARCH_ALT ARG PYTHON_VERSION ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins # Install Python, build-essential and python3-dev as apt dependencies RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # Python runtime - CRITICAL for virtual environment to work python${PYTHON_VERSION}-dev \ build-essential \ # jq and curl for polling various endpoints and health checks jq \ git \ git-lfs \ curl \ # Libraries required by UCX to find RDMA devices libibverbs1 rdma-core ibverbs-utils libibumad3 \ libnuma1 librdmacm1 ibverbs-providers \ # JIT Kernel Compilation, flashinfer ninja-build \ g++ \ # prometheus dependencies ca-certificates && \ rm -rf /var/lib/apt/lists/* # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/ COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ ### COPY NATS & ETCD ### # Copy nats and etcd from dynamo_base image COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH # Copy UCX from framework image as plugin for NIXL # Copy NIXL source from framework image # Copy dynamo wheels for gitlab artifacts COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX # Copies sglang repo (editable install) COPY --from=framework /opt/sglang /opt/sglang ENV LD_LIBRARY_PATH=\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib:\ /usr/local/ucx/lib/ucx:\ $LD_LIBRARY_PATH ### VIRTUAL ENVIRONMENT SETUP ### # Copy uv and entire virtual environment from framework container COPY --from=framework /bin/uv /bin/uvx /bin/ COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Install dynamo, NIXL, and dynamo-specific dependencies COPY benchmarks/ /opt/dynamo/benchmarks/ COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ RUN uv pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \ && cd /opt/dynamo/benchmarks \ && UV_GIT_LFS=1 uv pip install --no-cache . \ && cd - \ && rm -rf /opt/dynamo/benchmarks # Install common and test dependencies RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ UV_GIT_LFS=1 uv pip install \ --no-cache \ --requirement /tmp/requirements.txt \ --requirement /tmp/requirements.test.txt # Copy launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc # Copy tests, benchmarks, deploy and components for CI COPY tests /workspace/tests COPY benchmarks /workspace/benchmarks COPY examples /workspace/examples COPY deploy /workspace/deploy COPY components/ /workspace/components/ # Copy attribution files COPY ATTRIBUTION* LICENSE /workspace/ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ########################################################### ########## Development (run.sh, runs as root user) ######## ########################################################### # # PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) # # This stage runs as root and provides: # - Development tools and utilities for local debugging # - Support for vscode/cursor development outside the Dev Container plug-in # # Use this stage if you need a full-featured development environment with extra tools, # but do not use it with the Dev Container plug-in. FROM runtime AS dev # Don't want ubuntu to be editable, just change uid and gid. ARG WORKSPACE_DIR=/workspace # Install utilities as root RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ # Install utilities nvtop \ wget \ tmux \ vim \ git \ openssh-client \ iproute2 \ rsync \ zip \ unzip \ htop \ # Build Dependencies autoconf \ automake \ cmake \ libtool \ meson \ net-tools \ pybind11-dev \ # Rust build dependencies clang \ libclang-dev \ protobuf-compiler && \ rm -rf /var/lib/apt/lists/* # Set workspace directory variable ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ DYNAMO_HOME=${WORKSPACE_DIR} \ RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/workspace/target \ VIRTUAL_ENV=/opt/dynamo/venv \ PATH=/usr/local/cargo/bin:$PATH COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo # Install maturin, for maturin develop RUN uv pip install maturin[patchelf] # Editable install of dynamo COPY pyproject.toml README.md hatch_build.py /workspace/ RUN uv pip install --no-deps -e . ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []