Unverified Commit 4816d639 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

refactor: remove "dev" stage from Dockerfile.* and centralize them into Dockerfile.dev (#5050)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent a936ba5a
...@@ -69,6 +69,7 @@ core: ...@@ -69,6 +69,7 @@ core:
- 'container/build.sh' - 'container/build.sh'
- 'container/Dockerfile' - 'container/Dockerfile'
- '.dockerignore' - '.dockerignore'
- 'container/dev/**'
- 'container/deps/*' - 'container/deps/*'
- '.cargo/config.toml' - '.cargo/config.toml'
- 'lib/**' - 'lib/**'
......
...@@ -65,7 +65,7 @@ jobs: ...@@ -65,7 +65,7 @@ jobs:
- name: Define Image Tag - name: Define Image Tag
id: define_image_tag id: define_image_tag
run: | run: |
echo "image_tag=dynamo:latest" >> $GITHUB_OUTPUT echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
- name: Build image - name: Build image
env: env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }} GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
......
...@@ -438,14 +438,19 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \ ...@@ -438,14 +438,19 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
FROM base AS runtime FROM base AS runtime
ARG ARCH_ALT ARG ARCH_ALT
ARG PYTHON_VERSION
# Create dynamo user with group 0 for OpenShift compatibility # Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \ && useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \ && [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache \ && mkdir -p /home/dynamo/.cache /opt/dynamo \
&& chown -R dynamo: /opt/dynamo /home/dynamo /workspace \ # Non-recursive chown - only the directories themselves, not contents
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
# No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
# Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
# NIXL environment variables # NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \ ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
...@@ -472,55 +477,30 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca ...@@ -472,55 +477,30 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
############################################## # Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
########## Dev entrypoint image ############## # This is needed to create venv and install dynamo packages
############################################## ARG PYTHON_VERSION
RUN apt-get update && \
FROM runtime AS dev DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \
# Application environment variables python${PYTHON_VERSION}-venv && \
RUN apt-get update -y \ apt-get clean && \
&& apt-get install -y --no-install-recommends \ rm -rf /var/lib/apt/lists/* && \
# required for AIC perf files ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
git \
git-lfs \
# rust build packages
clang \
libclang-dev \
protobuf-compiler \
pkg-config \
# sudo for dev stage
sudo \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
# Add sudo privileges to dynamo user
&& echo "dynamo ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/dynamo \
&& chmod 0440 /etc/sudoers.d/dynamo
# Switch to dynamo user # Switch to dynamo user and create virtual environment
USER dynamo USER dynamo
ENV HOME=/home/dynamo \ ENV HOME=/home/dynamo
DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}
# Create and activate virtual environment # Create and activate virtual environment
ARG PYTHON_VERSION # Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
RUN uv venv /opt/dynamo/venv --python $PYTHON_VERSION SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
RUN uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
ENV VIRTUAL_ENV=/opt/dynamo/venv \ ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}" PATH="/opt/dynamo/venv/bin:${PATH}"
# Install common and test dependencies # Install dynamo wheels (runtime packages only, no test dependencies)
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
UV_GIT_LFS=1 uv pip install \
--no-cache \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
COPY --chown=dynamo: ./ /workspace/
ARG ENABLE_KVBM ARG ENABLE_KVBM
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
...@@ -533,25 +513,11 @@ RUN uv pip install \ ...@@ -533,25 +513,11 @@ RUN uv pip install \
exit 1; \ exit 1; \
fi; \ fi; \
uv pip install "$KVBM_WHEEL"; \ uv pip install "$KVBM_WHEEL"; \
fi && \ fi
cd /workspace/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache .
# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
ARG DYNAMO_COMMIT_SHA ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -651,4 +617,4 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \ ...@@ -651,4 +617,4 @@ RUN chmod 755 /opt/dynamo/.launch_screen && \
USER dynamo USER dynamo
ENTRYPOINT ["/epp"] ENTRYPOINT ["/epp"]
CMD ["/bin/bash"] CMD ["/bin/bash"]
\ No newline at end of file
# syntax=docker/dockerfile:1.10.0
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# This Dockerfile creates a local development environment for Dev Container plug-in use.
# It takes a BASE image (typically the dev target) and adds local-dev specific configurations
# including additional developer utilities and tools.
#
# Usage:
# - Dev Container IDE Extension: Use directly with VS Code/Cursor Dev Container extension
# - Command line: run.sh --image <local-dev-image> --mount-workspace ...
# where the ubuntu user inside the container is mapped to your local user login
ARG DEV_BASE=""
FROM ${DEV_BASE} AS local-dev
# Switch to root for package installation (dev stage ends as dynamo user)
USER root
# Reset SHELL to non-login bash (dev stage uses login shell)
SHELL ["/bin/bash", "-c"]
# Update package lists and install developer utilities. Some of these may exist in the base image,
# but to ensure consistency across all dev images, we explicitly list all required dev tools here.
RUN apt-get update && apt-get install -y \
# Development utilities
curl wget git vim nano less \
# System utilities
htop nvtop tmux screen \
# Network utilities
net-tools iproute2 iputils-ping \
# Archive utilities
zip unzip rsync \
# Build tools
build-essential cmake autoconf automake libtool \
# Debug and analysis tools
gdb valgrind strace ltrace \
# Text processing
jq yq grep sed \
# File utilities
tree fd-find ripgrep \
# Shell utilities
zsh fish bash-completion \
# User management
sudo gnupg2 gnupg1
# Install awk separately with fault tolerance
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
# Separated because TensorRT-LLM builds failed on awk package conflicts.
# This prevents main package installation failures due to awk availability issues.
RUN (apt-get install -y gawk || \
apt-get install -y mawk || \
apt-get install -y original-awk || \
echo "Warning: Could not install any awk implementation") && \
(which awk && echo "awk successfully installed: $(which awk)" || echo "awk not available")
# Don't want dynamo to be editable, just change uid and gid.
ENV USERNAME=dynamo
ARG USER_UID
ARG USER_GID
ARG WORKSPACE_DIR=/workspace
ARG ARCH=amd64
# Add NVIDIA devtools repository and install development tools
RUN wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub | gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
apt-get update && \
apt-get install -y nsight-systems-2025.5.1
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Configure user with sudo access for Dev Container workflows
RUN echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
# Handle GID conflicts: if target GID exists and it's not our group, remove it
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
# Create group if it doesn't exist, otherwise modify existing group
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
&& usermod -u $USER_UID -g $USER_GID -G 0 $USERNAME \
&& chown $USERNAME:$USER_GID /home/$USERNAME \
&& chsh -s /bin/bash $USERNAME
# Clean up package lists at the end
RUN rm -rf /var/lib/apt/lists/*
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR}
# Development environment variables for the local-dev target
# Path configuration notes:
# - DYNAMO_HOME: Main project directory (workspace mount point)
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
# - PATH: Includes cargo binaries for rust tool access
ENV HOME=/home/$USERNAME
ENV DYNAMO_HOME=${WORKSPACE_DIR}
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
# NOTE: CARGO_HOME and RUSTUP_HOME are already inherited from dev stage (Dockerfile.sglang|trtllm|vllm)
ENV PATH=${CARGO_HOME}/bin:$PATH
# Switch to dynamo user (dev stage has umask 002, so files should already be group-writable)
USER $USERNAME
WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
&& mkdir -p $HOME/.commandhistory \
&& chmod g+w $HOME/.commandhistory \
&& touch $HOME/.commandhistory/.bash_history \
&& echo "$SNIPPET" >> "$HOME/.bashrc"
RUN mkdir -p /home/$USERNAME/.cache/ \
&& chmod g+w /home/$USERNAME/.cache/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
...@@ -561,120 +561,6 @@ USER dynamo ...@@ -561,120 +561,6 @@ USER dynamo
ARG DYNAMO_COMMIT_SHA ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA} ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA}
###########################################################
########## Development (run.sh, runs as root user) ########
###########################################################
#
# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
#
# This stage runs as root and provides:
# - Development tools and utilities for local debugging
# - Support for vscode/cursor development outside the Dev Container plug-in
#
# Use this stage if you need a full-featured development environment with extra tools,
# but do not use it with the Dev Container plug-in.
FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
USER root
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# Install utilities
nvtop \
wget \
tmux \
vim \
git \
openssh-client \
iproute2 \
rsync \
zip \
unzip \
htop \
# Build Dependencies
autoconf \
automake \
cmake \
libtool \
meson \
net-tools \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler \
pkg-config && \
rm -rf /var/lib/apt/lists/*
# Set umask for group-writable files in dev stage (runs as root)
RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
PATH=/usr/local/cargo/bin:$PATH
# Copy rust installation from dynamo_base to avoid duplication efforts
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
COPY --from=dynamo_base --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base --chmod=775 /usr/local/cargo /usr/local/cargo
RUN chmod g+w /usr/local/rustup /usr/local/cargo
ARG ARCH
ARG ARCH_ALT
# Copy UCX and NIXL libraries for dev stage compilation
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
COPY --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/
# Set NIXL environment variables for compilation
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
# Update LD_LIBRARY_PATH to include NIXL libraries for the linker
ENV LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
${LD_LIBRARY_PATH}
# Create virtual environment for maturin develop (required by maturin develop command)
# Use --system-site-packages to inherit sglang, torch, etc. from upstream
RUN mkdir -p /opt/dynamo/venv && \
python3 -m venv --system-site-packages /opt/dynamo/venv
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Copy all packages from runtime stage system site-packages into venv
# This includes ai-dynamo-runtime, kubernetes, and all other dependencies
# Use --no-preserve=mode so copied files inherit umask 002 (group-writable)
RUN cp -r --no-preserve=mode /usr/local/lib/python3.12/dist-packages/* \
/opt/dynamo/venv/lib/python3.12/site-packages/
# Install maturin and uv into venv for development
# Uninstall broken maturin from upstream, then reinstall properly into venv
COPY --from=dynamo_base /bin/uv /opt/dynamo/venv/bin/uv
RUN pip install --ignore-installed maturin[patchelf]
# Editable install of dynamo components
COPY --chown=dynamo:0 --chmod=664 pyproject.toml README.md hatch_build.py /workspace/
RUN pip install --no-deps -e .
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
...@@ -827,82 +827,3 @@ ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ...@@ -827,82 +827,3 @@ ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
###########################################################
########## Development (run.sh, runs as root user) ########
###########################################################
#
# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
#
# This stage runs as root and provides:
# - Development tools and utilities for local debugging
# - Support for vscode/cursor development outside the Dev Container plug-in
#
# Use this stage if you need a full-featured development environment with extra tools,
# but do not use it with the Dev Container plug-in.
FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
# Switch to root for system package installation
USER root
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# Install utilities
nvtop \
wget \
tmux \
vim \
git \
iproute2 \
rsync \
zip \
unzip \
htop \
# Build Dependencies
autoconf \
automake \
cmake \
libtool \
meson \
net-tools \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler \
pkg-config && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set umask for group-writable files in dev stage (runs as root)
RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv \
PATH=/usr/local/cargo/bin:$PATH
# Copy rust installation from dynamo_base to avoid duplication efforts
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
COPY --from=dynamo_base --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base --chmod=775 /usr/local/cargo /usr/local/cargo
RUN chmod g+w /usr/local/rustup /usr/local/cargo
# Install maturin, for maturin develop
RUN uv pip install --no-cache maturin[patchelf]
# Editable install of dynamo
COPY --chown=dynamo:0 --chmod=664 pyproject.toml README.md hatch_build.py /workspace/
RUN uv pip install --no-cache --no-deps -e .
CMD []
...@@ -806,80 +806,3 @@ ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ...@@ -806,80 +806,3 @@ ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
###########################################################
########## Development (run.sh, runs as root user) ########
###########################################################
#
# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
#
# This stage runs as root and provides:
# - Development tools and utilities for local debugging
# - Support for vscode/cursor development outside the Dev Container plug-in
#
# Use this stage if you need a full-featured development environment with extra tools,
# but do not use it with the Dev Container plug-in.
FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
USER root
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# Install utilities
nvtop \
wget \
tmux \
vim \
git \
openssh-client \
iproute2 \
rsync \
zip \
unzip \
htop \
# Build Dependencies
autoconf \
automake \
cmake \
libtool \
meson \
net-tools \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler \
pkg-config && \
rm -rf /var/lib/apt/lists/*
# Set umask for group-writable files in dev stage (runs as root)
RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv \
PATH=/usr/local/cargo/bin:$PATH
# Copy rust installation from dynamo_base to avoid duplication efforts
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
COPY --from=dynamo_base --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base --chmod=775 /usr/local/cargo /usr/local/cargo
RUN chmod g+w /usr/local/rustup /usr/local/cargo
# Install maturin, for maturin develop
# Editable install of dynamo
COPY --chown=dynamo:0 --chmod=664 pyproject.toml README.md hatch_build.py /workspace/
RUN uv pip install maturin[patchelf] && \
uv pip install --no-deps -e .
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
...@@ -54,9 +54,22 @@ Below is a summary of the general file structure for the framework Dockerfile st ...@@ -54,9 +54,22 @@ Below is a summary of the general file structure for the framework Dockerfile st
| /opt/dynamo/venv/ | COPY from framework | | /opt/dynamo/venv/ | COPY from framework |
| /opt/vllm/ | COPY from framework | | /opt/vllm/ | COPY from framework |
| /workspace/{tests,examples,deploy}/ |COPY from build context | | /workspace/{tests,examples,deploy}/ |COPY from build context |
| **STAGE: dev** | **FROM runtime** | | **STAGE: dev** | **FROM runtime (via dev/Dockerfile.dev)** |
| /usr/local/rustup/ | COPY from dynamo_base | | /usr/bin/, /usr/lib/, etc. | COPY from dynamo_tools (dev utilities, git, sudo, etc.) |
| /usr/local/cargo/ | COPY from dynamo_base | | /usr/local/rustup/ | COPY from dynamo_tools |
| /usr/local/cargo/ | COPY from dynamo_tools |
| /usr/local/bin/maturin | COPY from dynamo_tools |
| /opt/dynamo/venv/ | For SGLang: created with --system-site-packages, includes uv and maturin |
| /workspace/ | Full source code copied from build context with editable install |
| **💡 Recommendation** | **Use --mount-workspace with run.sh** for live editing (bind mount overrides baked-in code) |
| PATH | Includes /opt/dynamo/venv/bin:/usr/local/cargo/bin |
| umask 002 | Login shell sources /etc/profile.d/00-umask.sh for group-writable files |
| **STAGE: local-dev** | **FROM dev (via dev/Dockerfile.dev)** |
| /home/dynamo/.rustup/ | COPY from /usr/local/rustup (user-writable) |
| USER | dynamo (UID/GID remapped to match host user) |
| **💡 Recommendation** | **Use --mount-workspace with run.sh** for live editing (bind mount overrides baked-in code) |
| RUSTUP_HOME | /home/dynamo/.rustup |
| CARGO_HOME | /home/dynamo/.cargo |
</details> </details>
### Why Containerization? ### Why Containerization?
...@@ -85,12 +98,12 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo ...@@ -85,12 +98,12 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
| **Usage** | Benchmarking inference and deployments, non-root | Development, compilation, testing locally | Legacy workflows, root user, use with caution | | **Usage** | Benchmarking inference and deployments, non-root | Development, compilation, testing locally | Legacy workflows, root user, use with caution |
| **User** | dynamo (UID 1000) | dynamo (UID=host user) with sudo | root (UID 0, use with caution) | | **User** | dynamo (UID 1000) | dynamo (UID=host user) with sudo | root (UID 0, use with caution) |
| **Home Directory** | `/home/dynamo` | `/home/dynamo` | `/root` | | **Home Directory** | `/home/dynamo` | `/home/dynamo` | `/root` |
| **Working Directory** | `/workspace` (in-container or mounted) | `/workspace` (must be mounted w/ `--mount-workspace`) | `/workspace` (must be mounted w/ `--mount-workspace`) | | **Working Directory** | `/workspace` (in-container or mounted) | `/workspace` (baked-in, optionally mounted w/ `--mount-workspace`) | `/workspace` (baked-in, optionally mounted w/ `--mount-workspace`) |
| **Rust Toolchain** | None (uses pre-built wheels) | System install (`/usr/local/rustup`, `/usr/local/cargo`) | System install (`/usr/local/rustup`, `/usr/local/cargo`) | | **Rust Toolchain** | None (uses pre-built wheels) | System install (`/usr/local/rustup`, `/usr/local/cargo`) | System install (`/usr/local/rustup`, `/usr/local/cargo`) |
| **Cargo Target** | None | `/workspace/target` | `/workspace/target` | | **Cargo Target** | None | `/workspace/target` | `/workspace/target` |
| **Python Env** | venv (`/opt/dynamo/venv`) for vllm/trtllm, system site-packages for sglang | venv (`/opt/dynamo/venv`) for vllm/trtllm, system site-packages for sglang | venv (`/opt/dynamo/venv`) for vllm/trtllm, system site-packages for sglang | | **Python Env** | venv (`/opt/dynamo/venv`) for vllm/trtllm, system site-packages for sglang | venv (`/opt/dynamo/venv`) for all frameworks (with --system-site-packages for sglang) | venv (`/opt/dynamo/venv`) for all frameworks (with --system-site-packages for sglang) |
**Note (SGLang)**: SGLang runtime uses system site-packages, but the `dev` image creates `/opt/dynamo/venv` (and `local-dev` inherits it from `dev`) for build tooling like `maturin`. **Note (SGLang)**: SGLang runtime uses system site-packages, but the `dev` and `local-dev` images create `/opt/dynamo/venv` with `--system-site-packages` for build tooling like `maturin` and `uv`.
## Usage Guidelines ## Usage Guidelines
...@@ -133,17 +146,31 @@ The `build.sh` script is responsible for building Docker images for different AI ...@@ -133,17 +146,31 @@ The `build.sh` script is responsible for building Docker images for different AI
**Key Features:** **Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE - **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE
- **Multi-stage Builds**: Build process with base images - **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev` target and `local-dev` target - **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`.
- **Build Caching**: Docker layer caching and sccache support - **Build Caching**: Docker layer caching and sccache support
- **GPU Optimization**: CUDA, EFA, and NIXL support - **GPU Optimization**: CUDA, EFA, and NIXL support
**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
- the selected framework Dockerfile, then
- `container/dev/Dockerfile.dev`
`build.sh` then continues building normally using the temp Dockerfile path.
**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
> **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
**Common Usage Examples:** **Common Usage Examples:**
```bash ```bash
# Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is fine to use for inferencing/benchmarking, etc. # Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
./build.sh ./build.sh
# Build both development and local-dev images (integrated into build.sh). While the dev image runs as root, the local-dev image will run as dynamo user with UID/GID matched to your host user, which is useful when mounting partitions. It will also contain development tools. # Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
# which is useful when mounting partitions for development.
./build.sh --framework vllm --target local-dev ./build.sh --framework vllm --target local-dev
# Build TensorRT-LLM development image called dynamo:latest-trtllm # Build TensorRT-LLM development image called dynamo:latest-trtllm
...@@ -328,10 +355,11 @@ See Docker documentation for custom network creation and management. ...@@ -328,10 +355,11 @@ See Docker documentation for custom network creation and management.
### Development Workflow ### Development Workflow
```bash ```bash
# 1. Build local-dev image (creates both dynamo:latest-vllm and dynamo:latest-vllm-local-dev) # 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
./build.sh --framework vllm --target local-dev ./build.sh --framework vllm --target local-dev
# 2. Run development container using the local-dev image # 2. Run development container using the local-dev image
# RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it ./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
# 3. Inside container, run inference (requires both frontend and backend) # 3. Inside container, run inference (requires both frontend and backend)
......
...@@ -583,65 +583,6 @@ BUILD_ARGS+=" --build-arg NIXL_LIBFABRIC_REF=${NIXL_LIBFABRIC_REF} " ...@@ -583,65 +583,6 @@ BUILD_ARGS+=" --build-arg NIXL_LIBFABRIC_REF=${NIXL_LIBFABRIC_REF} "
# Add EFA_VERSION as a build argument # Add EFA_VERSION as a build argument
BUILD_ARGS+=" --build-arg EFA_VERSION=${EFA_VERSION} " BUILD_ARGS+=" --build-arg EFA_VERSION=${EFA_VERSION} "
# Function to build local-dev image
build_local_dev_with_header() {
local dev_base_image="$1"
local tags="$2"
local success_msg="$3"
local header_title="$4"
# Get user info right before using it
USER_UID=${CUSTOM_UID:-$(id -u)}
USER_GID=${CUSTOM_GID:-$(id -g)}
# Set up dockerfile path
DOCKERFILE_LOCAL_DEV="${SOURCE_DIR}/Dockerfile.local_dev"
if [[ ! -f "$DOCKERFILE_LOCAL_DEV" ]]; then
echo "ERROR: Dockerfile.local_dev not found at: $DOCKERFILE_LOCAL_DEV"
exit 1
fi
echo ""
echo "Now building new local-dev image from: $dev_base_image"
echo "User 'dynamo' will have UID: $USER_UID, GID: $USER_GID"
# Show the docker command being executed if not in dry-run mode
if [ -z "$RUN_PREFIX" ]; then
set -x
fi
$RUN_PREFIX docker build \
--build-arg DEV_BASE="$dev_base_image" \
--build-arg USER_UID="$USER_UID" \
--build-arg USER_GID="$USER_GID" \
--build-arg ARCH="$ARCH" \
--file "$DOCKERFILE_LOCAL_DEV" \
$tags \
"$SOURCE_DIR" || {
{ set +x; } 2>/dev/null
echo "ERROR: Failed to build local_dev image"
exit 1
}
{ set +x; } 2>/dev/null
echo "$success_msg"
# Show usage instructions
echo ""
echo "To run the local-dev image as the local user ($USER_UID/$USER_GID):"
# Extract the first tag from the tags string (the full version tag, not the latest tag)
last_tag=$(echo "$tags" | grep -o -- '--tag [^ ]*' | head -1 | cut -d' ' -f2)
# Calculate relative path to run.sh from current working directory
# Get the directory where build.sh is located
build_dir="$(dirname "${BASH_SOURCE[0]}")"
# Get the absolute path to run.sh (in the same directory as build.sh)
run_abs_path="$(realpath "$build_dir/run.sh")"
# Calculate relative path from current PWD to run.sh
run_path="$(python3 -c "import os; print(os.path.relpath('$run_abs_path', '$PWD'))")"
echo " $run_path --image $last_tag --mount-workspace ..."
}
# Function to build AWS EFA images from base runtime or dev images # Function to build AWS EFA images from base runtime or dev images
build_aws_with_header() { build_aws_with_header() {
local base_image="$1" local base_image="$1"
...@@ -683,14 +624,6 @@ build_aws_with_header() { ...@@ -683,14 +624,6 @@ build_aws_with_header() {
} }
# Handle local-dev target
if [[ $TARGET == "local-dev" ]]; then
LOCAL_DEV_BUILD=true
TARGET_STR="--target dev"
fi
# BUILD DEV IMAGE
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG" BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
if [ -n "${GITHUB_TOKEN}" ]; then if [ -n "${GITHUB_TOKEN}" ]; then
...@@ -946,11 +879,73 @@ if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then ...@@ -946,11 +879,73 @@ if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
# Add arguments required for sglang blackwell build # Add arguments required for sglang blackwell build
BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64" BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi fi
# Dev/local-dev targets: build from a concatenated Dockerfile:
# <framework Dockerfile> + container/dev/Dockerfile.dev
if [[ -z "${TARGET:-}" || "${TARGET:-}" == "dev" || "${TARGET:-}" == "local-dev" ]]; then
_gen_dev_dockerfile_temp() {
local fw_df dev_df out
fw_df="$1"
dev_df="${SOURCE_DIR}/dev/Dockerfile.dev"
if [[ ! -f "${fw_df}" ]]; then
error "ERROR:" "Framework Dockerfile not found: ${fw_df}"
fi
if [[ ! -f "${dev_df}" ]]; then
error "ERROR:" "Dev Dockerfile not found: ${dev_df}"
fi
out="$(mktemp -t dynamo-dev-combined.XXXXXX.Dockerfile)"
cat "${fw_df}" "${dev_df}" > "${out}"
printf '\n' >> "${out}"
if [[ ! -s "${out}" ]]; then
rm -f "${out}"
error "ERROR:" "Temp Dockerfile was generated but is empty"
fi
printf '%s\n' "${out}"
}
DOCKERFILE="$(_gen_dev_dockerfile_temp "${DOCKERFILE}")"
# Ensure we clean up the temp Dockerfile (opt-out with KEEP_DEV_DOCKERFILE_TEMP=1 for debugging).
if [[ "${KEEP_DEV_DOCKERFILE_TEMP:-}" != "1" ]]; then
trap 'rm -f "${DOCKERFILE}" 2>/dev/null || true' EXIT
fi
# Dockerfile.dev expects a lowercase framework string.
BUILD_ARGS+=" --build-arg FRAMEWORK=${FRAMEWORK,,} "
# Preserve historical tagging behavior for dev/local-dev (build.sh used to delegate out).
base="${TAG#--tag }"
base="${base%-runtime}"
base="${base%-local-dev}"
base="${base%-dev}"
if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
TAG="--tag ${base}-dev"
else
TAG="--tag ${base}-local-dev"
# Default UID/GID behavior: current user if not specified.
if [[ -z "${CUSTOM_UID:-}" ]]; then
CUSTOM_UID="$(id -u)"
fi
if [[ -z "${CUSTOM_GID:-}" ]]; then
CUSTOM_GID="$(id -g)"
fi
BUILD_ARGS+=" --build-arg USER_UID=${CUSTOM_UID} --build-arg USER_GID=${CUSTOM_GID} "
fi
fi
LATEST_TAG="" LATEST_TAG=""
if [ -z "${NO_TAG_LATEST}" ]; then if [ -z "${NO_TAG_LATEST}" ]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}" if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
LATEST_TAG="${LATEST_TAG}-${TARGET}" elif [[ "${TARGET}" == "local-dev" ]]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}-local-dev"
else
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}"
fi
fi fi
fi fi
...@@ -1053,46 +1048,4 @@ if [[ "${MAKE_EFA:-}" == "true" ]]; then ...@@ -1053,46 +1048,4 @@ if [[ "${MAKE_EFA:-}" == "true" ]]; then
build_aws_with_header "$BASE_IMAGE_FOR_EFA" "$AWS_TAGS" "$EFA_STAGE" "Successfully built ${EFA_STAGE} image" build_aws_with_header "$BASE_IMAGE_FOR_EFA" "$AWS_TAGS" "$EFA_STAGE" "Successfully built ${EFA_STAGE} image"
fi fi
# Handle local-dev build
if [[ "${LOCAL_DEV_BUILD:-}" == "true" ]]; then
if [[ "${MAKE_EFA:-}" == "true" ]]; then
# With EFA: build local-dev-aws from dev-aws
DEV_AWS_IMAGE=$(echo "$AWS_TAGS" | grep -o -- '--tag [^ ]*' | head -1 | cut -d' ' -f2)
LOCAL_DEV_AWS_TAGS=""
if [[ -n "$TAG" ]]; then
TAG_NAME=$(echo "$TAG" | sed 's/--tag //')
LOCAL_DEV_AWS_TAGS+=" --tag ${TAG_NAME}-local-dev-aws"
fi
if [[ -n "$LATEST_TAG" ]]; then
LATEST_TAG_NAME=$(echo "$LATEST_TAG" | sed 's/--tag //')
LOCAL_DEV_AWS_TAGS+=" --tag ${LATEST_TAG_NAME}-local-dev-aws"
fi
build_local_dev_with_header "$DEV_AWS_IMAGE" "$LOCAL_DEV_AWS_TAGS" "Successfully built local-dev-aws image" "Building Local-Dev-AWS Image"
else
# Without EFA: build regular local-dev from dev
if [[ -n "$TAG" ]]; then
DEV_IMAGE=$(echo "$TAG" | sed 's/--tag //')
else
DEV_IMAGE="dynamo:latest-${FRAMEWORK,,}"
fi
LOCAL_DEV_TAGS=""
if [[ -n "$TAG" ]]; then
TAG_NAME=$(echo "$TAG" | sed 's/--tag //')
LOCAL_DEV_TAGS+=" --tag ${TAG_NAME}-local-dev"
fi
if [[ -n "$LATEST_TAG" ]]; then
LATEST_TAG_NAME=$(echo "$LATEST_TAG" | sed 's/--tag //')
LOCAL_DEV_TAGS+=" --tag ${LATEST_TAG_NAME}-local-dev"
fi
# Extract first tag for success message
FIRST_TAG=$(echo "$LOCAL_DEV_TAGS" | grep -o -- '--tag [^ ]*' | head -1 | cut -d' ' -f2)
build_local_dev_with_header "$DEV_IMAGE" "$LOCAL_DEV_TAGS" "Successfully built $FIRST_TAG" "Building Local-Dev Image"
fi
fi
{ set +x; } 2>/dev/null { set +x; } 2>/dev/null
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Framework-specific environment variables and paths
# Only add paths that exist to avoid cluttering environment
# TensorRT-LLM specific variables
if [ -d /usr/local/tensorrt/targets ]; then
export TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/$(uname -m)-linux-gnu/lib
[ -d "$TENSORRT_LIB_DIR" ] && export LD_LIBRARY_PATH="${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}"
fi
if [ -d /opt/hpcx/ompi ]; then
export OPAL_PREFIX=/opt/hpcx/ompi
export OMPI_MCA_coll_ucc_enable=0
export PATH="/opt/hpcx/ompi/bin:${PATH}"
export LD_LIBRARY_PATH="/opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}"
fi
[ -d /opt/hpcx/ucc/lib ] && export LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
[ -f /etc/shinit_v2 ] && export ENV="${ENV:-/etc/shinit_v2}"
[ -d /usr/local/ucx/bin ] && export PATH="/usr/local/ucx/bin:${PATH}"
[ -d /usr/local/cuda/bin ] && export PATH="/usr/local/cuda/bin:${PATH}"
[ -d /usr/local/cuda/nvvm/bin ] && export PATH="/usr/local/cuda/nvvm/bin:${PATH}"
# vLLM nvshmem
[ -d /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib ] && \
export LD_LIBRARY_PATH="/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:${LD_LIBRARY_PATH}"
# System nvshmem (TRT-LLM)
ARCH_ALT=$(uname -m | sed 's/aarch64/aarch64/;s/x86_64/x86_64/')
[ -d "/usr/lib/${ARCH_ALT}-linux-gnu/nvshmem/13" ] && \
export LD_LIBRARY_PATH="/usr/lib/${ARCH_ALT}-linux-gnu/nvshmem/13:${LD_LIBRARY_PATH}"
# PyTorch libraries (TRT-LLM)
PYTHON_VERSION=${PYTHON_VERSION:-3.12}
[ -d "/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib" ] && \
export LD_LIBRARY_PATH="/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch/lib:${LD_LIBRARY_PATH}"
[ -d "/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib" ] && \
export LD_LIBRARY_PATH="/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/torch_tensorrt/lib:${LD_LIBRARY_PATH}"
# syntax=docker/dockerfile:1.10.0
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Unified development image with two targets:
# - dev: Root-based development for use with run.sh
# - local-dev: Non-root development with UID/GID remapping for Dev Container plugin
#
# IMPORTANT (concat model):
# This Dockerfile is intended to be used via the temp concatenated Dockerfile flow in
# `container/build.sh` (which prepends the selected framework Dockerfile):
# - container/Dockerfile
# - container/Dockerfile.vllm
# - container/Dockerfile.trtllm
# - container/Dockerfile.sglang
#
# The concatenated file provides the stages this Dockerfile depends on:
# - `dynamo_base` (framework base stage; used for cached tool binaries like maturin)
# - `wheel_builder` (framework wheel_builder stage; used for cached Rust/Cargo and SGLang NIXL deps)
#
# Dependency graph (concat flow):
#
# container/build.sh concatenates:
# [framework Dockerfile] + [this file]
#
# Framework Dockerfile (examples: Dockerfile.vllm / Dockerfile.trtllm / Dockerfile.sglang)
# defines these stages (names matter; this file refers to them by name):
#
# dynamo_base (FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG})
# ├─ wheel_builder (FROM quay.io/pypa/manylinux_2_28_*)
# ├─ framework (builds framework install + /opt/dynamo/venv, etc.)
# └─ runtime (FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG}; copies from dynamo_base/wheel_builder/framework)
# └─ dev (root dev image; adds dev-time linking config and pulls in tooling from dynamo_tools)
# └─ local-dev (non-root dev image with UID/GID remapping)
#
# Side stage used by `dev`:
#
# dynamo_tools (FROM runtime; installs extra developer utilities that `dev` copies in)
#
# Both targets share:
# - Developer utilities and tools from dynamo-tools
# - Rust toolchain + maturin for editable installs (from concatenated framework stages)
# - NIXL dependencies for SGLang (from concatenated framework wheel_builder stage)
#
# Note on build args:
# - `ARCH` / `ARCH_ALT` are declared in the prepended framework Dockerfile; we re-declare them only
# in stages where they are used (Docker requires ARG re-declare per-stage).
# ======================================================================
# STAGE: dynamo_tools for developers
# ======================================================================
# Why this is a separate stage (not merged into `dev`):
# - `dev` is built FROM the framework `runtime` image. Installing lots of tooling with apt in that stage is slow and
# makes rebuilds expensive when iterating on later dev layers.
# - Keeping tooling installation in `dynamo_tools` lets Docker cache the tools layer independently; `dev` can then
# pull those binaries/configs in via COPY.
FROM runtime AS dynamo_tools
ARG ARCH
ARG ARCH_ALT
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH=/usr/local/bin:${PATH}
USER root
SHELL ["/bin/bash", "-c"]
# NOTE: We intentionally disable the NVIDIA CUDA apt repo for this stage.
# The upstream runtime images may ship CUDA apt sources that occasionally go out of sync (mirror updates),
# causing apt-get update to fail with "File has unexpected size ... Mirror sync in progress".
# This stage only installs generic developer tools that are available from Ubuntu repos, so CUDA repos are unnecessary.
#
# We also add a small retry/backoff to make transient apt metadata issues less disruptive.
RUN set -eux; \
if [ -d /etc/apt/sources.list.d ]; then \
mkdir -p /tmp/apt-disabled; \
for f in /etc/apt/sources.list.d/*.list; do \
[ -e "$f" ] || continue; \
if grep -q "developer.download.nvidia.com/compute/cuda/repos" "$f"; then \
mv "$f" "/tmp/apt-disabled/$(basename "$f")"; \
fi; \
done; \
fi; \
for i in 1 2 3 4 5; do \
apt-get update -y && break; \
rm -rf /var/lib/apt/lists/*; \
sleep $((i * 5)); \
done; \
apt-get install -y --no-install-recommends \
# Core CLI utilities
ca-certificates \
curl \
wget \
git \
git-lfs \
less \
grep \
sed \
# Editors / shells
vim \
nano \
htop \
tmux \
screen \
zsh \
fish \
bash-completion \
# Networking / transfers
net-tools \
openssh-client \
iproute2 \
iputils-ping \
zip \
unzip \
rsync \
# Build toolchain
build-essential \
cmake \
autoconf \
automake \
libtool \
meson \
ninja-build \
pybind11-dev \
pkg-config \
protobuf-compiler \
# Debugging / tracing
gdb \
valgrind \
strace \
ltrace \
# JSON/YAML + filesystem helpers
jq \
yq \
tree \
fd-find \
ripgrep \
# Privilege escalation + crypto tooling
sudo \
gnupg2 \
gnupg1 \
# GPU / perf helpers
nvtop \
# Python
python3 \
python3-pip \
python3-venv \
# Native deps for Python/Rust wheels
patchelf \
clang \
libclang-dev && \
rm -rf /var/lib/apt/lists/* && \
# Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
git lfs install
# Install awk separately with fault tolerance.
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
RUN (apt-get update && \
(apt-get install -y --no-install-recommends gawk || \
apt-get install -y --no-install-recommends mawk || \
apt-get install -y --no-install-recommends original-awk || \
echo "Warning: Could not install any awk implementation") && \
rm -rf /var/lib/apt/lists/*) && \
(command -v awk >/dev/null 2>&1 && echo "awk available: $(command -v awk)" || echo "awk not available")
# Add NVIDIA devtools repository and install development tools (nsight-systems).
RUN wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" | \
gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" | \
tee /etc/apt/sources.list.d/nvidia-devtools.list && \
apt-get update && \
apt-get install -y --no-install-recommends nsight-systems-2025.5.1 && \
rm -rf /var/lib/apt/lists/*
# ======================================================================
# TARGET: dev (root-based development)
# ======================================================================
FROM runtime AS dev
# Redeclare ARGs for use in this stage
ARG FRAMEWORK
USER root
# Redeclare build args for use in this stage
ARG PYTHON_VERSION
# Ensure the runtime stage always has /usr/bin/python3.
# - vLLM/TRTLLM runtime images may only have Python in /opt/dynamo/venv/bin/{python,python3}
# - SGLang runtime images typically have /usr/bin/python3 already
# - framework=none runtime stage now installs /usr/bin/python3
RUN if [ ! -e /usr/bin/python3 ]; then \
if [ -x /opt/dynamo/venv/bin/python3 ]; then \
ln -s /opt/dynamo/venv/bin/python3 /usr/bin/python3; \
elif [ -x /opt/dynamo/venv/bin/python ]; then \
ln -s /opt/dynamo/venv/bin/python /usr/bin/python3; \
elif command -v python3 >/dev/null 2>&1; then \
ln -s $(command -v python3) /usr/bin/python3; \
elif command -v python >/dev/null 2>&1; then \
ln -s $(command -v python) /usr/bin/python3; \
else \
echo "ERROR: Could not find Python to symlink to /usr/bin/python3" >&2; \
exit 1; \
fi; \
fi
# Copy UCX and NIXL libraries for dev stage compilation.
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
ARG ARCH_ALT
RUN --mount=from=wheel_builder,target=/wheel_builder \
if [ "${FRAMEWORK}" = "sglang" ]; then \
if [ -d /wheel_builder/usr/local/ucx ] && [ -d /wheel_builder/opt/nvidia/nvda_nixl ]; then \
mkdir -p /opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d; \
cp -r /wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/; \
cp -r /wheel_builder/usr/local/ucx /usr/local/; \
cp -r /wheel_builder/usr/local/libfabric /usr/local/; \
cp /wheel_builder/usr/include/gdrapi.h /usr/include/; \
cp /wheel_builder/usr/lib64/libgdrapi.so* /usr/lib64/; \
echo "/usr/lib64" >> /etc/ld.so.conf.d/gdrcopy.conf; \
# SGLang expects ARCH-qualified lib paths; mirror lib64 into lib/${ARCH_ALT}-linux-gnu for parity.
if [ -d /opt/nvidia/nvda_nixl/lib64 ]; then \
mkdir -p /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu; \
cp -r /opt/nvidia/nvda_nixl/lib64/. /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/; \
fi; \
fi; \
fi
# All frameworks use the same path pattern: /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm)
# For sglang: This sets them for the first time (required)
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
# Set universal CUDA development environment variables (all frameworks)
# vLLM: Dockerfile.vllm line 533, 597
# TRT-LLM: Dockerfile.trtllm lines 600-606
ENV CUDA_HOME=/usr/local/cuda \
CPATH=/usr/local/cuda/include \
CUDA_DEVICE_ORDER=PCI_BUS_ID \
TRITON_CUPTI_PATH=/usr/local/cuda/include \
TRITON_CUDACRT_PATH=/usr/local/cuda/include \
TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \
TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \
TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
TRITON_CUDART_PATH=/usr/local/cuda/include \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
ARG PYTHON_VERSION
ENV LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/usr/local/cuda/compat/lib.real:\
${LD_LIBRARY_PATH}
# Copy shell profile script for framework-specific environment variables
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
COPY --chmod=755 container/dev/50-framework-paths.sh /etc/profile.d/50-framework-paths.sh
# Set umask for group-writable files in dev stage (runs as root)
RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Developer tools are installed in the dynamo_tools layer and copied into the runtime-based dev image.
# This keeps dev builds fast and avoids apt-get in runtime-derived stages.
#
# IMPORTANT: Do not clobber runtime /usr/bin/python3 (SGLang depends on system python3 being present).
# We stash the pre-tools python3 (which may be a real binary or a symlink we created earlier for vLLM/TRTLLM)
# and restore it after copying toolchains from dynamo_tools.
RUN if [ -e /usr/bin/python3 ]; then cp -a /usr/bin/python3 /tmp/python3.pretools; fi
COPY --from=dynamo_tools /usr/bin/ /usr/bin/
COPY --from=dynamo_tools /usr/sbin/ /usr/sbin/
COPY --from=dynamo_tools /usr/lib/ /usr/lib/
COPY --from=dynamo_tools /lib/ /lib/
COPY --from=dynamo_tools /usr/share/ /usr/share/
COPY --from=dynamo_tools /etc/alternatives/ /etc/alternatives/
COPY --from=dynamo_tools /etc/bash_completion.d/ /etc/bash_completion.d/
COPY --from=dynamo_tools /etc/sudoers /etc/sudoers
COPY --from=dynamo_tools /etc/sudoers.d/ /etc/sudoers.d/
COPY --from=dynamo_tools /opt/nvidia/ /opt/nvidia/
# Restore the pre-tools python3 (keeps SGLang system python intact and avoids venv symlink loops).
RUN if [ -e /tmp/python3.pretools ]; then cp -af /tmp/python3.pretools /usr/bin/python3; fi
ARG WORKSPACE_DIR=/workspace
# Dev environment variables (aligned with framework dev stages)
# Framework-specific PATH additions are handled in /etc/profile.d/50-framework-paths.sh
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv \
PATH=/opt/dynamo/venv/bin:/usr/local/cargo/bin:$PATH
# Copy Rust/Cargo/Maturin from the concatenated framework stages.
# - Rust/Cargo: from `wheel_builder` (already installed there)
# - maturin: from `wheel_builder` venv (installed there via uv pip)
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /workspace/.venv/bin/maturin /usr/local/bin/maturin
# Provide an `uv` binary for SGLang venv creation below.
COPY --from=ghcr.io/astral-sh/uv:latest /uv /tmp/uv-binary
# Create venv for SGLang (vLLM/TensorRT-LLM/framework=none already have /opt/dynamo/venv from runtime)
# - SGLang: Use --system-site-packages to inherit runtime packages, then copy user site-packages
# - framework=none: Runtime already has venv with dynamo packages installed
# Note: umask 002 from login shell ensures files are group-writable
RUN if [ "${FRAMEWORK}" = "sglang" ]; then \
mkdir -p /opt/dynamo/venv && \
python3 -m venv --system-site-packages /opt/dynamo/venv && \
# Copy all packages from runtime stage system site-packages into venv
# This includes ai-dynamo-runtime, kubernetes, and all other dependencies
# Use --no-preserve=mode so copied files inherit umask 002 (group-writable)
cp -r --no-preserve=mode /usr/local/lib/python3.12/dist-packages/* \
/opt/dynamo/venv/lib/python3.12/site-packages/; \
# Ensure `uv` is available on PATH for subsequent `uv pip ...` steps.
cp /tmp/uv-binary /opt/dynamo/venv/bin/uv && \
chmod +x /opt/dynamo/venv/bin/uv && \
# Install maturin into the base interpreter so we can build/repair wheels when needed.
pip install --ignore-installed maturin[patchelf]; \
elif [ "${FRAMEWORK}" = "none" ] && [ ! -d /opt/dynamo/venv ]; then \
mkdir -p /opt/dynamo && \
python3 -m venv /opt/dynamo/venv; \
fi
# Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
RUN git lfs install
# Install common and test dependencies (matches main Dockerfile dev stage)
# This installs pytest-benchmark and other test dependencies required for CI
# TRT-LLM specific: Also installs cupy-cuda13x with special index strategy (Dockerfile.trtllm lines 768-776)
# SGLang specific: Reinstall pytest to ensure venv has pytest executable with correct shebang
ARG FRAMEWORK
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
UV_GIT_LFS=1 uv pip install \
--no-cache \
--index-strategy unsafe-best-match \
--extra-index-url https://download.pytorch.org/whl/cu130 \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt \
cupy-cuda13x && \
if [ "${FRAMEWORK}" = "sglang" ]; then \
uv pip install --force-reinstall --no-deps pytest; \
fi
# Copy entire workspace (old design - simpler for CI)
# .dockerignore filters out unwanted files (.git, build artifacts, etc.)
WORKDIR ${WORKSPACE_DIR}
COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
RUN chmod g+w ${WORKSPACE_DIR}
# Install benchmarks package (includes prefix_data_generator, tabulate, etc.)
RUN cd ${WORKSPACE_DIR}/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache .
# Install maturin and create editable install entry points.
#
# Why the `uv` check:
# - This dev stage uses `uv` earlier (requirements + benchmarks). For SGLang, we also install an `uv` binary into
# /opt/dynamo/venv/bin and put that venv on PATH, so `uv` is expected to be available here in normal builds.
# - The `command -v uv` guard is defensive: on SGLang, `uv` needs to "disappear" from PATH and we fall back to
# `python3 -m pip` so the editable install can still proceed (instead of failing mid-layer with a confusing error).
RUN if [ -f pyproject.toml ]; then \
if command -v uv >/dev/null 2>&1; then \
uv pip install --no-cache maturin[patchelf] && uv pip install --no-deps -e . ; \
else \
python3 -m pip install --no-cache-dir maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
fi; \
else \
echo "ERROR: pyproject.toml not found in ${WORKSPACE_DIR}; expected to build from the Dynamo repo root." >&2; \
exit 1; \
fi && \
chmod -R g+w /root/.cache /home/dynamo/.cache 2>/dev/null || true
# Set commit SHA for tests (passed via build.sh as --build-arg)
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
# ======================================================================
# TARGET: local-dev (non-root development with UID/GID remapping)
# ======================================================================
FROM dev AS local-dev
ENV USERNAME=dynamo
ARG USER_UID
ARG USER_GID
# Copy rustup home into a writable per-user location so sanity_check passes.
# (dev target already has rustup/cargo/maturin from concatenated wheel_builder/dynamo_base)
RUN cp -r /usr/local/rustup /home/dynamo/.rustup && \
chown -R dynamo:0 /home/dynamo/.rustup
# Put rustup state under the user's home (writable) while still using /usr/local/cargo/bin shims.
ENV RUSTUP_HOME=/home/${USERNAME}/.rustup
ENV CARGO_HOME=/home/${USERNAME}/.cargo
ENV PATH=/usr/local/cargo/bin:/usr/local/bin:${CARGO_HOME}/bin:${PATH}
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Configure user with sudo access for Dev Container workflows
#
# 🚨 PERFORMANCE / PERMISSIONS MEMO (DO NOT VIOLATE)
# NEVER use `chown -R` or `chmod -R` in local-dev images.
# - It can take minutes on large mounts (and makes devcontainers feel "hung")
# - It is unnecessary: permissioning should be done via COPY --chmod/--chown and a few targeted, non-recursive ops.
# If you think you need recursion here, stop and redesign the permissions flow.
RUN mkdir -p /etc/sudoers.d \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
# Handle GID conflicts: if target GID exists and it's not our group, remove it
&& (getent group $USER_GID | grep -v "^$USERNAME:" && groupdel $(getent group $USER_GID | cut -d: -f1) || true) \
# Create group if it doesn't exist, otherwise modify existing group
&& (getent group $USERNAME > /dev/null 2>&1 && groupmod -g $USER_GID $USERNAME || groupadd -g $USER_GID $USERNAME) \
&& usermod -u $USER_UID -g $USER_GID -G 0 $USERNAME \
&& chown $USERNAME:$USER_GID /home/$USERNAME \
&& chsh -s /bin/bash $USERNAME
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR}
# Development environment variables for the local-dev target
# Path configuration notes:
# - DYNAMO_HOME: Main project directory (workspace mount point)
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
# - PATH: Includes cargo binaries for rust tool access
ENV HOME=/home/$USERNAME
ENV DYNAMO_HOME=${WORKSPACE_DIR}
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
ENV PATH=${CARGO_HOME}/bin:$PATH
# Switch to dynamo user (dev stage has umask 002, so files should already be group-writable)
USER $USERNAME
WORKDIR $HOME
# Create user-level cargo/rustup state dirs as the target user (avoids root-owned caches).
RUN mkdir -p "${CARGO_HOME}" "${RUSTUP_HOME}"
# Ensure Python user site-packages exists and is writable (important for non-venv frameworks like SGLang).
RUN python3 -c 'import os, site; p = site.getusersitepackages(); os.makedirs(p, exist_ok=True); print(p)'
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
&& mkdir -p $HOME/.commandhistory \
&& chmod g+w $HOME/.commandhistory \
&& touch $HOME/.commandhistory/.bash_history \
&& echo "$SNIPPET" >> "$HOME/.bashrc"
RUN mkdir -p /home/$USERNAME/.cache/ \
&& chmod g+w /home/$USERNAME/.cache/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment