"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "a99564ac5b2bc86d97cb18a7e18086b4ba94466a"
Unverified Commit f70dd663 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: uv network timeout to be more resilient to intermittent network issues (part 2) (#5530)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 3b9b3f31
......@@ -207,7 +207,10 @@ ENV CUDA_PATH=/usr/local/cuda \
# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
ARG NIXL_UCX_REF
......@@ -385,6 +388,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -400,10 +405,14 @@ COPY launch/ /opt/dynamo/launch/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheels
# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -496,7 +505,9 @@ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/whe
# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
# This is needed to create venv and install dynamo packages
ARG PYTHON_VERSION
RUN apt-get update && \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv && \
......@@ -511,15 +522,21 @@ ENV HOME=/home/dynamo
# Create and activate virtual environment
# Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
RUN uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
# Cache uv downloads; uv handles its own locking for the cache.
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Install dynamo wheels (runtime packages only, no test dependencies)
# uv handles its own locking for the cache, no need to add sharing=locked
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
RUN uv pip install \
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
......@@ -554,7 +571,9 @@ FROM ${EPP_IMAGE} AS epp
FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS frontend
ARG PYTHON_VERSION
RUN apt-get update -y \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update -y \
&& apt-get install -y --no-install-recommends \
# required for EPP
ca-certificates \
......@@ -606,20 +625,26 @@ COPY --chown=dynamo: --from=runtime /bin/uv /bin/uvx /bin/
COPY --chown=dynamo: --from=runtime /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Install common and test dependencies
# Install common and test dependencies. In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
UV_GIT_LFS=1 uv pip install \
--no-cache \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
RUN uv pip install \
# In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
......@@ -640,7 +665,8 @@ RUN uv pip install \
uv pip install "$KVBM_WHEEL"; \
fi && \
cd /workspace/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache .
export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install .
# Setup environment for all users
USER root
......
......@@ -220,7 +220,10 @@ ENV CUDA_PATH=/usr/local/cuda \
# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
ARG NIXL_UCX_REF
......@@ -397,6 +400,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -412,10 +417,14 @@ COPY launch/ /opt/dynamo/launch/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheels
# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -490,7 +499,9 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
RUN apt-get update && \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# required for verification of GPG keys
gnupg2 \
......@@ -518,7 +529,9 @@ ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=bind,source=.,target=/mnt/local_src \
pip install --no-cache-dir --break-system-packages \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
......@@ -534,14 +547,16 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \
# Install common and test dependencies as root
RUN --mount=type=bind,source=.,target=/mnt/local_src \
pip install --no-cache-dir --break-system-packages \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
export PIP_CACHE_DIR=/root/.cache/pip && \
pip install --break-system-packages \
--requirement /mnt/local_src/container/deps/requirements.txt \
--requirement /mnt/local_src/container/deps/requirements.test.txt \
sglang==${SGLANG_VERSION} && \
cd /workspace/benchmarks && \
pip install --break-system-packages --no-cache . && \
pip install --break-system-packages . && \
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --no-cache-dir --break-system-packages "urllib3>=2.6.3" && \
pip install --break-system-packages "urllib3>=2.6.3" && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks && \
# Install NVIDIA packages based on CUDA version
......@@ -549,13 +564,13 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \
if [ "$CUDA_MAJOR" = "12" ]; then \
# Install NVIDIA packages that are needed for DeepEP to work properly
# This is done in the upstream runtime image too, but these packages are overridden in earlier commands
pip install --no-cache-dir --break-system-packages --force-reinstall --no-deps \
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu12==2.28.3 \
nvidia-cudnn-cu12==9.16.0.29 \
nvidia-cutlass-dsl==4.3.5; \
elif [ "$CUDA_MAJOR" = "13" ]; then \
# CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
pip install --no-cache-dir --break-system-packages --force-reinstall --no-deps \
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu13==2.28.3 \
nvidia-cublas==13.1.0.3 \
nvidia-cutlass-dsl==4.3.1 \
......
......@@ -239,7 +239,10 @@ ENV CUDA_PATH=/usr/local/cuda \
# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
ARG NIXL_UCX_REF
......@@ -416,6 +419,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -431,11 +436,15 @@ COPY launch/ /opt/dynamo/launch/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheels
# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
ARG ENABLE_KVBM
ARG USE_SCCACHE
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -498,7 +507,9 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
# Install minimal dependencies needed for TensorRT-LLM installation
ARG PYTHON_VERSION
RUN apt-get update && \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \
python3-pip \
......@@ -511,6 +522,7 @@ RUN apt-get update && \
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
export UV_CACHE_DIR=/root/.cache/uv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
ENV VIRTUAL_ENV=/opt/dynamo/venv \
......@@ -564,7 +576,10 @@ ARG GITHUB_TRTLLM_COMMIT
COPY --from=trtllm_wheel / /trtllm_wheel/
COPY --from=trtllm_wheel_image /app/tensorrt_llm /trtllm_wheel_image/
RUN uv pip install --no-cache "cuda-python==13.0.2"
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install "cuda-python==13.0.2"
# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
# because there might be mismatched versions of TensorRT between the NGC PyTorch
......@@ -575,11 +590,18 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
rm -f /etc/apt/trusted.gpg.d/cuda*.gpg
RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
# Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \
# Modify the script to use virtual environment pip instead of system pip3
sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
bash /tmp/install_tensorrt.sh && \
# Install from local wheel directory in build context
WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
if [ -n "$WHEEL_FILE" ]; then \
uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
uv pip install "$WHEEL_FILE" triton==3.5.1; \
else \
echo "No wheel file found in /trtllm_wheel directory."; \
exit 1; \
......@@ -587,7 +609,7 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
elif [ -n "$(find /trtllm_wheel_image -name "*.whl" | head -n 1)" ]; then \
# Install from wheel embedded in the TRTLLM release image
WHEEL_FILE="$(find /trtllm_wheel_image -name "*.whl" | head -n 1)"; \
uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
uv pip install "$WHEEL_FILE" triton==3.5.1; \
else \
# Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
# TRTLLM 1.2.0rc6.post2 has issues installing from pypi with uv, installing from direct wheel link works best
......@@ -596,9 +618,9 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \
DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \
uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
else \
uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
fi; \
fi && \
# Run TensorRT installer that ships with the TRTLLM wheel
......@@ -818,8 +840,9 @@ COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
RUN uv pip install \
--no-cache \
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
......@@ -837,18 +860,20 @@ RUN uv pip install \
echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install --no-cache "$KVBM_WHEEL"; \
uv pip install "$KVBM_WHEEL"; \
fi && \
cd /workspace/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache . && \
export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install . && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
UV_GIT_LFS=1 uv pip install \
--no-cache \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--index-strategy unsafe-best-match \
--extra-index-url https://download.pytorch.org/whl/cu130 \
--requirement /tmp/requirements.txt \
......
......@@ -237,7 +237,10 @@ ENV CUDA_PATH=/usr/local/cuda \
# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
ARG NIXL_UCX_REF
......@@ -436,6 +439,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -455,6 +460,10 @@ COPY components/ /opt/dynamo/components/
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
......@@ -522,7 +531,9 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/
ARG PYTHON_VERSION
RUN apt-get update -y \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
......@@ -545,6 +556,7 @@ RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/li
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
export UV_CACHE_DIR=/root/.cache/uv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
......@@ -568,6 +580,7 @@ ENV CUDA_HOME=/usr/local/cuda
# Install VLLM and related dependencies
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh \
......@@ -657,7 +670,9 @@ ARG PYTHON_VERSION
ENV PYTHON_VERSION=${PYTHON_VERSION}
# Install Python, build-essential and python3-dev as apt dependencies
RUN apt-get update && \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && \
CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*} &&\
CUDA_VERSION_MINOR=$(echo "${CUDA_VERSION#*.}" | cut -d. -f1) && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
......@@ -765,7 +780,9 @@ COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
RUN uv pip install \
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
......@@ -786,15 +803,17 @@ RUN uv pip install \
uv pip install "$KVBM_WHEEL"; \
fi && \
cd /workspace/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache . && \
export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install . && \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
# Install common and test dependencies
# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
UV_GIT_LFS=1 uv pip install \
--no-cache \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
......
......@@ -150,6 +150,77 @@ The `build.sh` script is responsible for building Docker images for different AI
- **Build Caching**: Docker layer caching and sccache support
- **GPU Optimization**: CUDA, EFA, and NIXL support
#### BuildKit cache mounts in Dockerfiles
The framework Dockerfiles use BuildKit cache mounts (`RUN --mount=type=cache,...`) to reduce repeated downloads across builds. These caches are stored in Docker/BuildKit’s cache storage on the host (not in your host `~/.cache`), and are shared across builds that use the same builder.
Common cache mount targets:
- `--mount=type=cache,target=/root/.cache/uv`: `uv` download cache (wheels/sdists, git checkouts used by `uv`, etc.)
- `--mount=type=cache,target=/var/cache/apt,sharing=locked`: apt download cache (`sharing=locked` avoids apt/dpkg races with concurrent builds)
- `--mount=type=cache,target=/var/cache/{yum,dnf},sharing=locked`: yum/dnf metadata cache (`sharing=locked` avoids corruption with concurrent builds)
- `--mount=type=cache,target=/root/.cargo/{registry,git}`: Cargo crate/git download caches (Cargo has its own locking; no `sharing=locked` needed)
To inspect cache usage:
```bash
docker buildx du
docker info --format 'DockerRootDir: {{.DockerRootDir}}'
```
##### Inspecting BuildKit cache on the host (quick checklist)
1. Quick summary:
```bash
docker buildx du | tail -5
```
2. Find Docker root:
```bash
docker info | grep "Docker Root Dir"
# Output example: Docker Root Dir: /var/lib/docker
```
3. Check executor storage size:
```bash
DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
sudo du -sh "${DOCKER_ROOT}/buildkit/executor" 2>/dev/null || true
```
4. Find specific caches (example: uv cache under BuildKit executor rootfs):
```bash
DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
sudo sh -c 'find '"${DOCKER_ROOT}"'/buildkit/executor/*/rootfs/root/.cache/uv -type d 2>/dev/null | while read -r dir; do
parent=$(dirname "$(dirname "$(dirname "$dir")")")
du -sh "$parent/root/.cache/uv" 2>/dev/null
done'
```
5. List all large cache directories:
```bash
DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
sudo sh -c 'du -sh '"${DOCKER_ROOT}"'/buildkit/executor/* 2>/dev/null | sort -h | tail -10'
```
Cleanup commands:
```bash
# Safe: clean only reclaimable cache
docker buildx prune
# Aggressive: clean everything
docker buildx prune --all
# Time-based: remove cache older than 3 days
docker buildx prune --filter until=72h
```
Current cache types (as mounted in various Dockerfiles):
1. `/root/.cache/uv` and `/home/dynamo/.cache/uv` - Python packages (uv; match the current `USER`)
2. `/root/.cargo/registry` - Rust crates
3. `/root/.cargo/git` - Rust git deps
4. `/var/cache/yum`, `/var/cache/dnf` - AlmaLinux packages
5. `/var/cache/apt` - Ubuntu packages
Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
......
......@@ -72,7 +72,9 @@ SHELL ["/bin/bash", "-c"]
# This stage only installs generic developer tools that are available from Ubuntu repos, so CUDA repos are unnecessary.
#
# We also add a small retry/backoff to make transient apt metadata issues less disruptive.
RUN set -eux; \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
if [ -d /etc/apt/sources.list.d ]; then \
mkdir -p /tmp/apt-disabled; \
for f in /etc/apt/sources.list.d/*.list; do \
......@@ -156,7 +158,9 @@ RUN set -eux; \
# Install awk separately with fault tolerance.
# awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
RUN (apt-get update && \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
(apt-get update && \
(apt-get install -y --no-install-recommends gawk || \
apt-get install -y --no-install-recommends mawk || \
apt-get install -y --no-install-recommends original-awk || \
......@@ -165,7 +169,9 @@ RUN (apt-get update && \
(command -v awk >/dev/null 2>&1 && echo "awk available: $(command -v awk)" || echo "awk not available")
# Add NVIDIA devtools repository and install development tools (nsight-systems).
RUN wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" | \
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" | \
gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" | \
tee /etc/apt/sources.list.d/nvidia-devtools.list && \
......@@ -346,8 +352,10 @@ RUN git lfs install
ARG FRAMEWORK
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
# Cache uv downloads; uv handles its own locking for this cache.
--mount=type=cache,target=/root/.cache/uv \
UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 uv pip install \
export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--index-strategy unsafe-best-match \
--extra-index-url https://download.pytorch.org/whl/cu130 \
--requirement /tmp/requirements.txt \
......@@ -364,8 +372,10 @@ COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
RUN chmod g+w ${WORKSPACE_DIR}
# Install benchmarks package (includes prefix_data_generator, tabulate, etc.)
RUN cd ${WORKSPACE_DIR}/benchmarks && \
UV_GIT_LFS=1 uv pip install --no-cache .
RUN --mount=type=cache,target=/root/.cache/uv \
cd ${WORKSPACE_DIR}/benchmarks && \
export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install .
# Install maturin and create editable install entry points.
#
......@@ -374,11 +384,15 @@ RUN cd ${WORKSPACE_DIR}/benchmarks && \
# /opt/dynamo/venv/bin and put that venv on PATH, so `uv` is expected to be available here in normal builds.
# - The `command -v uv` guard is defensive: on SGLang, `uv` needs to "disappear" from PATH and we fall back to
# `python3 -m pip` so the editable install can still proceed (instead of failing mid-layer with a confusing error).
RUN if [ -f pyproject.toml ]; then \
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/pip,sharing=locked \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 PIP_CACHE_DIR=/root/.cache/pip && \
if [ -f pyproject.toml ]; then \
if command -v uv >/dev/null 2>&1; then \
uv pip install --no-cache maturin[patchelf] && uv pip install --no-deps -e . ; \
uv pip install maturin[patchelf] && uv pip install --no-deps -e . ; \
else \
python3 -m pip install --no-cache-dir maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
python3 -m pip install maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
fi; \
else \
echo "ERROR: pyproject.toml not found in ${WORKSPACE_DIR}; expected to build from the Dynamo repo root." >&2; \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment