Unverified Commit a60cdf59 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: optimize dev/local-dev Dockerfiles for source-based development (part 1) (#6743)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent a01cd9c1
......@@ -22,6 +22,7 @@ ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ARG DYNAMO_DOCS_VERSION=dev
ENV DYNAMO_DOCS_VERSION=$DYNAMO_DOCS_VERSION
# TODO: Pin uv image to a specific version tag for reproducibility (e.g. ghcr.io/astral-sh/uv:0.10.7)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN apt-get update && \
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
Dynamo Dev Container
====================
/workspace is bind-mounted from your host. Before running or testing,
compile the Rust extensions and install the Python packages:
cargo build --features dynamo-llm/block-manager
cd /workspace/lib/bindings/python && maturin develop --uv && cd /workspace
uv pip install --no-deps -e /workspace
Verify with:
/workspace/deploy/sanity_check.py
......@@ -28,6 +28,7 @@ SHELL ["/bin/bash", "-c"]
# This stage only installs generic developer tools that are available from Ubuntu repos, so CUDA repos are unnecessary.
#
# We also add a small retry/backoff to make transient apt metadata issues less disruptive.
# Estimated layer size: ~800MB–1.0GB (build-essential+clang ~500MB, the rest ~300MB)
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
......@@ -113,7 +114,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
# Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
git lfs install
# Install awk separately with fault tolerance.
# Install awk separately with fault tolerance (~2MB).
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
......@@ -126,6 +127,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
(command -v awk >/dev/null 2>&1 && echo "awk available: $(command -v awk)" || echo "awk not available")
# Add NVIDIA devtools repository and install development tools (nsight-systems).
# Estimated layer size: ~500MB–1.5GB (nsight-systems is a full profiling suite)
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" \
......@@ -136,9 +138,25 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get install -y --no-install-recommends nsight-systems-2025.5.1 && \
rm -rf /var/lib/apt/lists/*
# TODO: Add GitHub CLI (gh) for development. Estimated layer size: ~50MB
# ======================================================================
# TARGET: dev (root-based development)
# ======================================================================
#
# USAGE: This dev image ships /workspace EMPTY. You MUST:
#
# 1) Bind-mount your Dynamo repo checkout into the container:
# docker run --gpus all -v /path/to/dynamo:/workspace ...
#
# 2) Build from source inside the container:
# cargo build --features dynamo-llm/block-manager
# cd /workspace/lib/bindings/python && maturin develop --uv
# uv pip install --no-deps -e /workspace
#
# The pre-built ai-dynamo / ai-dynamo-runtime wheels from the runtime
# stage are uninstalled below to avoid conflicts with the source build.
# ======================================================================
FROM runtime AS dev
# Redeclare ARGs for use in this stage
......@@ -260,7 +278,7 @@ ARG WORKSPACE_DIR=/workspace
# Framework-specific PATH additions are handled in /etc/profile.d/50-framework-paths.sh
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
RUSTUP_HOME=/home/dynamo/.rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv \
......@@ -269,34 +287,27 @@ ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
# Copy Rust/Cargo/Maturin from the concatenated framework stages.
# - Rust/Cargo: from `wheel_builder` (already installed there)
# - maturin: from `wheel_builder` venv (installed there via uv pip)
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/rustup /home/dynamo/.rustup
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo
COPY --from=wheel_builder --chown=dynamo:0 --chmod=775 /workspace/.venv/bin/maturin /usr/local/bin/maturin
# Provide an `uv` binary for SGLang venv creation below.
COPY --from=ghcr.io/astral-sh/uv:latest /uv /tmp/uv-binary
# Create venv for SGLang (vLLM/TensorRT-LLM/framework=none already have /opt/dynamo/venv from runtime)
# - SGLang: Use --system-site-packages to inherit runtime packages, then copy user site-packages
# - framework=none: Runtime already has venv with dynamo packages installed
# Note: umask 002 from login shell ensures files are group-writable
RUN if [ "${FRAMEWORK}" = "sglang" ]; then \
mkdir -p /opt/dynamo/venv && \
{% if framework == "sglang" %}
# SGLang: Create venv with --system-site-packages to inherit runtime packages
COPY --from=ghcr.io/astral-sh/uv:0.10.7 /uv /tmp/uv-binary
RUN mkdir -p /opt/dynamo/venv && \
python3 -m venv --system-site-packages /opt/dynamo/venv && \
# Copy all packages from runtime stage system site-packages into venv
# This includes ai-dynamo-runtime, kubernetes, and all other dependencies
# Use --no-preserve=mode so copied files inherit umask 002 (group-writable)
cp -r --no-preserve=mode /usr/local/lib/python${PYTHON_VERSION}/dist-packages/* \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/; \
# Ensure `uv` is available on PATH for subsequent `uv pip ...` steps.
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages/ && \
cp /tmp/uv-binary /opt/dynamo/venv/bin/uv && \
chmod +x /opt/dynamo/venv/bin/uv && \
# Install maturin into the base interpreter so we can build/repair wheels when needed.
pip install --ignore-installed maturin[patchelf]; \
elif [ "${FRAMEWORK}" = "none" ] && [ ! -d /opt/dynamo/venv ]; then \
pip install --ignore-installed maturin[patchelf]
{% elif framework == "dynamo" %}
# framework=none: Create venv if runtime stage didn't already provide one
RUN if [ ! -d /opt/dynamo/venv ]; then \
mkdir -p /opt/dynamo && \
python3 -m venv /opt/dynamo/venv; \
fi
{% endif %}
# Initialize Git LFS for the dynamo user (required for requirements with lfs=true)
RUN git lfs install
......@@ -323,42 +334,37 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
# Copy entire workspace (old design - simpler for CI)
# .dockerignore filters out unwanted files (.git, build artifacts, etc.)
WORKDIR ${WORKSPACE_DIR}
COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
RUN chmod g+w ${WORKSPACE_DIR}
# Install benchmarks package (includes prefix_data_generator, tabulate, etc.)
RUN --mount=type=cache,target=/root/.cache/uv \
cd ${WORKSPACE_DIR}/benchmarks && \
export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install .
# Install maturin and create editable install entry points.
#
# Why the `uv` check:
# - This dev stage uses `uv` earlier (requirements + benchmarks). For SGLang, we also install an `uv` binary into
# /opt/dynamo/venv/bin and put that venv on PATH, so `uv` is expected to be available here in normal builds.
# - The `command -v uv` guard is defensive: on SGLang, `uv` needs to "disappear" from PATH and we fall back to
# `python3 -m pip` so the editable install can still proceed (instead of failing mid-layer with a confusing error).
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 PIP_CACHE_DIR=/root/.cache/pip && \
if [ -f pyproject.toml ]; then \
if command -v uv >/dev/null 2>&1; then \
uv pip install maturin[patchelf] && uv pip install --no-deps -e . ; \
else \
python3 -m pip install maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
fi; \
# We don't actually need /workspace because for development, this must be mounted as a volume.
#COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
RUN mkdir -p ${WORKSPACE_DIR} && chmod g+w ${WORKSPACE_DIR}
# Remove pre-built dynamo packages inherited from the runtime stage.
# The dev image builds from source, so these would conflict with the editable installs.
# NOTE: This does NOT reclaim disk space in the image (files still exist in lower layers).
# Space is only recovered if the image is later squashed / compacted (e.g. docker-squash,
# `docker build --squash`, or export/import).
RUN uv pip uninstall ai-dynamo ai-dynamo-runtime kvbm 2>/dev/null || true
# Install maturin only (no editable install of the dynamo package).
# /workspace is empty at build time — the repo is bind-mounted at container start, not COPYed.
# `uv pip install -e .` would fail here because there is no pyproject.toml in /workspace yet.
# The editable install must be done at runtime after the volume mount (e.g. `maturin develop`).
RUN if command -v uv >/dev/null 2>&1; then \
uv pip install maturin[patchelf] ; \
else \
echo "ERROR: pyproject.toml not found in ${WORKSPACE_DIR}; expected to build from the Dynamo repo root." >&2; \
exit 1; \
fi && \
chmod -R g+w /root/.cache /home/dynamo/.cache 2>/dev/null || true
python3 -m pip install maturin[patchelf] ; \
fi
# Set commit SHA for tests (passed via docker build as --build-arg)
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# Setup dev launch banner (displayed on interactive shell entry)
RUN --mount=type=bind,source=./container/launch_message/dev.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen && \
chmod 755 /opt/dynamo/.launch_screen && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -25,6 +25,7 @@ RUN wget --tries=3 --waitretry=5 \
rm -rf sccache*
# Install uv package manager
# TODO: Pin uv image to a specific version tag for reproducibility (e.g. ghcr.io/astral-sh/uv:0.10.7)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install NATS server
......
......@@ -52,15 +52,19 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
# Copy built artifacts
{% if target not in ("dev", "local-dev") %}
# Copy built artifacts (not needed for dev/local-dev; users build from source)
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
{% endif %}
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
# This is needed to create venv and install dynamo packages
ARG PYTHON_VERSION
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
# Clear partial downloads first to avoid stale rename failures from prior interrupted builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
rm -rf /var/cache/apt/archives/partial/* && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \
......@@ -74,7 +78,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
patchelf \
git \
git-lfs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
......@@ -93,24 +96,16 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
{% if target not in ("dev", "local-dev") %}
# Install dynamo wheels (runtime packages only, no test dependencies)
# uv handles its own locking for the cache, no need to add sharing=locked
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$GMS_WHEEL"; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \
KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
if [ -z "$KVBM_WHEEL" ]; then \
......@@ -119,6 +114,22 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
fi; \
uv pip install "$KVBM_WHEEL"; \
fi
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel only (pre-built C++ binary, not buildable from source).
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install /opt/dynamo/wheelhouse/nixl/nixl*.whl
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -n "$GMS_WHEEL" ]; then uv pip install "$GMS_WHEEL"; fi; \
fi
# Initialize Git LFS (required for git+https dependencies with LFS artifacts)
RUN git lfs install
......@@ -134,6 +145,7 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
# TODO: skip /workspace COPY for dev/local-dev (bind-mounted from host, gets shadowed)
# Copy workspace source code
ARG WORKSPACE_DIR=/workspace
WORKDIR ${WORKSPACE_DIR}
......
......@@ -16,12 +16,8 @@ ENV USERNAME=dynamo
ARG USER_UID
ARG USER_GID
# Copy rustup home into a writable per-user location so sanity_check passes.
# (dev target already has rustup/cargo/maturin from concatenated wheel_builder/dynamo_base)
RUN cp -r /usr/local/rustup /home/dynamo/.rustup && \
chown -R dynamo:0 /home/dynamo/.rustup
# Put rustup state under the user's home (writable) while still using /usr/local/cargo/bin shims.
# rustup is already at /home/dynamo/.rustup from the dev stage (COPY --from=wheel_builder
# with --chown=dynamo:0 --chmod=775), so no re-copy needed here.
ENV RUSTUP_HOME=/home/${USERNAME}/.rustup
ENV CARGO_HOME=/home/${USERNAME}/.cargo
ENV PATH=/usr/local/cargo/bin:/usr/local/bin:${CARGO_HOME}/bin:${PATH}
......
......@@ -66,27 +66,40 @@ COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /o
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
{% if target not in ("dev", "local-dev") %}
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
sglang==${SGLANG_VERSION} && \
sglang==${SGLANG_VERSION}
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel (pre-built C++ binary, not buildable from source) and sglang.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
sglang==${SGLANG_VERSION}
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
export PIP_CACHE_DIR=/root/.cache/pip && \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
pip install --no-cache-dir --break-system-packages "$GMS_WHEEL"; \
if [ -n "$GMS_WHEEL" ]; then pip install --no-cache-dir --break-system-packages "$GMS_WHEEL"; fi; \
fi
{% if target not in ("dev", "local-dev") %}
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
{% endif %}
# Install common and test dependencies as root
RUN --mount=type=bind,source=container/deps/requirements.txt,target=/tmp/deps/requirements.txt \
......@@ -97,12 +110,17 @@ RUN --mount=type=bind,source=container/deps/requirements.txt,target=/tmp/deps/re
--requirement /tmp/deps/requirements.txt \
--requirement /tmp/deps/requirements.test.txt \
sglang==${SGLANG_VERSION} && \
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --break-system-packages "urllib3>=2.6.3"
{% if target not in ("dev", "local-dev") %}
# Install benchmarks and fix permissions (dev/local-dev install from bind-mounted source if needed)
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
cd /workspace/benchmarks && \
pip install --break-system-packages . && \
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --break-system-packages "urllib3>=2.6.3" && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
{% endif %}
# Force-reinstall NVIDIA packages in a separate layer so requirements.txt changes don't trigger re-download
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
......
......@@ -199,28 +199,24 @@ $LD_LIBRARY_PATH
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
ENV OPAL_PREFIX=/opt/hpcx/ompi
# TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if target not in ("dev", "local-dev") %}
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
{% endif %}
# Install dynamo, NIXL, and dynamo-specific dependencies
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
{% if target not in ("dev", "local-dev") %}
# Install dynamo, NIXL, and dynamo-specific dependencies
ARG ENABLE_KVBM
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$GMS_WHEEL"; \
fi && \
if [ "${ENABLE_KVBM}" = "true" ]; then \
KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
if [ -z "$KVBM_WHEEL" ]; then \
......@@ -231,8 +227,23 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
fi && \
cd /workspace/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache . && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel only (pre-built C++ binary, not buildable from source).
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install /opt/dynamo/wheelhouse/nixl/nixl*.whl
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -n "$GMS_WHEEL" ]; then uv pip install "$GMS_WHEEL"; fi; \
fi
# Install common and test dependencies
# --no-cache is intentional: mixed indexes (PyPI + PyTorch CUDA wheels) risk serving stale/wrong-variant cached wheels
......
......@@ -187,30 +187,25 @@ $NIXL_PLUGIN_DIR:\
$LD_LIBRARY_PATH
ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Copy attribution files
# TODO: skip /workspace COPYs for dev/local-dev (bind-mounted from host, these get shadowed)
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if target not in ("dev", "local-dev") %}
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
{% endif %}
# Install dynamo, NIXL, and dynamo-specific dependencies
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
{% if target not in ("dev", "local-dev") %}
# Install dynamo, NIXL, and dynamo-specific dependencies
ARG ENABLE_KVBM
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$GMS_WHEEL"; \
fi && \
if [ "${ENABLE_KVBM}" = "true" ]; then \
KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
if [ -z "$KVBM_WHEEL" ]; then \
......@@ -222,8 +217,23 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
cd /workspace/benchmarks && \
export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install . && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel only (pre-built C++ binary, not buildable from source).
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install /opt/dynamo/wheelhouse/nixl/nixl*.whl
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -n "$GMS_WHEEL" ]; then uv pip install "$GMS_WHEEL"; fi; \
fi
# Install ModelExpress for P2P weight transfer (optional)
......
......@@ -310,6 +310,7 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
ldconfig
# Build NIXL wheel → /opt/dynamo/dist/nixl/nixl*.whl (C++ transport library, all targets)
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
......@@ -321,12 +322,17 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
cd /workspace/nixl && \
uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
{% if target not in ("dev", "local-dev") %}
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
# Build dynamo wheels → /opt/dynamo/dist/:
# uv build → ai_dynamo*any.whl (main Python package)
# maturin build → ai_dynamo_runtime*.whl (Rust bindings)
# maturin build → kvbm*.whl (KV block manager, conditional on ENABLE_KVBM)
# The caches do not need the "shared" lock because Cargo has its own locking mechanism.
ARG ENABLE_KVBM
ARG ENABLE_MEDIA_FFMPEG
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
......@@ -364,7 +370,23 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
/tmp/use-sccache.sh show-stats "Dynamo"
# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
{% else %}
# Dev/local-dev targets do not have pre-built wheels or /workspace source code.
# After you start the local-dev/dev container, you will need to build from source:
# cargo build --features dynamo-llm/block-manager
# cd /workspace/lib/bindings/python && maturin develop --uv && cd /workspace
# uv pip install --no-deps -e /workspace
# See container/launch_message/dev.txt for the full setup steps.
# Create dist dir with a placeholder so downstream COPY --from=wheel_builder /opt/dynamo/dist/*.whl always has a match.
RUN mkdir -p /opt/dynamo/dist ${CARGO_TARGET_DIR} && \
touch /opt/dynamo/dist/.placeholder.whl
# Dev/local-dev skip the full COPY lib/ above, so copy gpu_memory_service source explicitly for the wheel build below
COPY lib/gpu_memory_service/ /opt/dynamo/lib/gpu_memory_service/
{% endif %}
# Build gpu-memory-service wheel → /opt/dynamo/dist/gpu_memory_service*.whl (small C++ extension, fast build -- all targets, all frameworks)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
......
......@@ -83,6 +83,7 @@ ENV PATH=/usr/local/ucx/bin:$PATH
### VIRTUAL ENVIRONMENT SETUP ###
# Copy uv directly from official image (like Dockerfile.vllm does)
# TODO: Pin uv image to a specific version tag for reproducibility (e.g. ghcr.io/astral-sh/uv:0.10.7)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Create fresh virtual environment (following Dockerfile.vllm pattern)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment