fix: uv network timeout to be more resilient to intermittent network issues (part 2) (#5530)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>

fix: uv network timeout to be more resilient to intermittent network issues (part 2) (#5530)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
f70dd663 · Keiven C · GitHub · 3b9b3f31 · f70dd663 · f70dd663
Unverified Commit f70dd663 authored Feb 03, 2026 by Keiven C Committed by GitHub Feb 03, 2026
6 changed files
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -207,7 +207,10 @@ ENV CUDA_PATH=/usr/local/cuda \
 # Create virtual environment for building wheels
 ARG PYTHON_VERSION
 ENV VIRTUAL_ENV=/workspace/.venv
-RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit

 ARG NIXL_UCX_REF
@@ -385,6 +388,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \

 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -400,10 +405,14 @@ COPY launch/ /opt/dynamo/launch/
 COPY lib/ /opt/dynamo/lib/
 COPY components/ /opt/dynamo/components/

-# Build dynamo wheels
+# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
 ARG ENABLE_KVBM
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -496,7 +505,9 @@ COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/whe
 # Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
 # This is needed to create venv and install dynamo packages
 ARG PYTHON_VERSION
-RUN apt-get update && \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        python${PYTHON_VERSION}-dev \
        python${PYTHON_VERSION}-venv && \
@@ -511,15 +522,21 @@ ENV HOME=/home/dynamo
 # Create and activate virtual environment
 # Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
 SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
-RUN uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
+# Cache uv downloads; uv handles its own locking for the cache.
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}

 ENV VIRTUAL_ENV=/opt/dynamo/venv \
    PATH="/opt/dynamo/venv/bin:${PATH}"

 # Install dynamo wheels (runtime packages only, no test dependencies)
+# uv handles its own locking for the cache, no need to add sharing=locked
 ARG ENABLE_KVBM
 ARG ENABLE_GPU_MEMORY_SERVICE
-RUN uv pip install \
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
@@ -554,7 +571,9 @@ FROM ${EPP_IMAGE} AS epp
 FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS frontend

 ARG PYTHON_VERSION
-RUN apt-get update -y \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update -y \
    && apt-get install -y --no-install-recommends \
        # required for EPP
        ca-certificates \
@@ -606,20 +625,26 @@ COPY --chown=dynamo: --from=runtime /bin/uv /bin/uvx /bin/
 COPY --chown=dynamo: --from=runtime /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/

 # Create virtual environment
-RUN mkdir -p /opt/dynamo/venv && \
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    mkdir -p /opt/dynamo/venv && \
    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

-# Install common and test dependencies
+# Install common and test dependencies. In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    UV_GIT_LFS=1 uv pip install \
-        --no-cache \
+    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install \
        --requirement /tmp/requirements.txt \
        --requirement /tmp/requirements.test.txt

 ARG ENABLE_KVBM
 ARG ENABLE_GPU_MEMORY_SERVICE
-RUN uv pip install \
+# In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
@@ -640,7 +665,8 @@ RUN uv pip install \
        uv pip install "$KVBM_WHEEL"; \
    fi && \
    cd /workspace/benchmarks && \
-    UV_GIT_LFS=1 uv pip install --no-cache .
+    export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install .

 # Setup environment for all users
 USER root

--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -220,7 +220,10 @@ ENV CUDA_PATH=/usr/local/cuda \
 # Create virtual environment for building wheels
 ARG PYTHON_VERSION
 ENV VIRTUAL_ENV=/workspace/.venv
-RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit

 ARG NIXL_UCX_REF
@@ -397,6 +400,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \

 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -412,10 +417,14 @@ COPY launch/ /opt/dynamo/launch/
 COPY lib/ /opt/dynamo/lib/
 COPY components/ /opt/dynamo/components/

-# Build dynamo wheels
+# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
 ARG ENABLE_KVBM
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -490,7 +499,9 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh

-RUN apt-get update && \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # required for verification of GPG keys
        gnupg2 \
@@ -518,7 +529,9 @@ ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
 # Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
 ARG ENABLE_GPU_MEMORY_SERVICE
 RUN --mount=type=bind,source=.,target=/mnt/local_src \
-    pip install --no-cache-dir --break-system-packages \
+    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    export PIP_CACHE_DIR=/root/.cache/pip && \
+    pip install --break-system-packages \
        /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
        /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
        /opt/dynamo/wheelhouse/nixl/nixl*.whl \
@@ -534,14 +547,16 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \

 # Install common and test dependencies as root
 RUN --mount=type=bind,source=.,target=/mnt/local_src \
-    pip install --no-cache-dir --break-system-packages \
+    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    export PIP_CACHE_DIR=/root/.cache/pip && \
+    pip install --break-system-packages \
        --requirement /mnt/local_src/container/deps/requirements.txt \
        --requirement /mnt/local_src/container/deps/requirements.test.txt \
        sglang==${SGLANG_VERSION} && \
    cd /workspace/benchmarks && \
-    pip install --break-system-packages --no-cache . && \
+    pip install --break-system-packages . && \
    #TODO: Temporary change until upstream sglang runtime image is updated
-    pip install --no-cache-dir --break-system-packages "urllib3>=2.6.3" && \
+    pip install --break-system-packages "urllib3>=2.6.3" && \
    # pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
    chmod -R g+w /workspace/benchmarks && \
    # Install NVIDIA packages based on CUDA version
@@ -549,13 +564,13 @@ RUN --mount=type=bind,source=.,target=/mnt/local_src \
    if [ "$CUDA_MAJOR" = "12" ]; then \
        # Install NVIDIA packages that are needed for DeepEP to work properly
        # This is done in the upstream runtime image too, but these packages are overridden in earlier commands
-        pip install --no-cache-dir --break-system-packages --force-reinstall --no-deps \
+        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu12==2.28.3 \
            nvidia-cudnn-cu12==9.16.0.29 \
            nvidia-cutlass-dsl==4.3.5; \
    elif [ "$CUDA_MAJOR" = "13" ]; then \
        # CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
-        pip install --no-cache-dir --break-system-packages --force-reinstall --no-deps \
+        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu13==2.28.3 \
            nvidia-cublas==13.1.0.3 \
            nvidia-cutlass-dsl==4.3.1 \

--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -239,7 +239,10 @@ ENV CUDA_PATH=/usr/local/cuda \
 # Create virtual environment for building wheels
 ARG PYTHON_VERSION
 ENV VIRTUAL_ENV=/workspace/.venv
-RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit

 ARG NIXL_UCX_REF
@@ -416,6 +419,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \

 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -431,11 +436,15 @@ COPY launch/ /opt/dynamo/launch/
 COPY lib/ /opt/dynamo/lib/
 COPY components/ /opt/dynamo/components/

-# Build dynamo wheels
+# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
 ARG ENABLE_KVBM
 ARG USE_SCCACHE
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -498,7 +507,9 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/

 # Install minimal dependencies needed for TensorRT-LLM installation
 ARG PYTHON_VERSION
-RUN apt-get update && \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        python${PYTHON_VERSION}-dev \
        python3-pip \
@@ -511,6 +522,7 @@ RUN apt-get update && \

 # Create virtual environment
 RUN mkdir -p /opt/dynamo/venv && \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

 ENV VIRTUAL_ENV=/opt/dynamo/venv \
@@ -564,7 +576,10 @@ ARG GITHUB_TRTLLM_COMMIT
 COPY --from=trtllm_wheel / /trtllm_wheel/
 COPY --from=trtllm_wheel_image /app/tensorrt_llm /trtllm_wheel_image/

-RUN uv pip install --no-cache "cuda-python==13.0.2"
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install "cuda-python==13.0.2"

 # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
 # because there might be mismatched versions of TensorRT between the NGC PyTorch
@@ -575,11 +590,18 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
    rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
    rm -f /etc/apt/trusted.gpg.d/cuda*.gpg

-RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
+        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
+        curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \
+        # Modify the script to use virtual environment pip instead of system pip3
+        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
+        bash /tmp/install_tensorrt.sh && \
        # Install from local wheel directory in build context
        WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
        if [ -n "$WHEEL_FILE" ]; then \
-            uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
+            uv pip install "$WHEEL_FILE" triton==3.5.1; \
        else \
            echo "No wheel file found in /trtllm_wheel directory."; \
            exit 1; \
@@ -587,7 +609,7 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
    elif [ -n "$(find /trtllm_wheel_image -name "*.whl" | head -n 1)" ]; then \
        # Install from wheel embedded in the TRTLLM release image
        WHEEL_FILE="$(find /trtllm_wheel_image -name "*.whl" | head -n 1)"; \
-        uv pip install --no-cache "$WHEEL_FILE" triton==3.5.1; \
+        uv pip install "$WHEEL_FILE" triton==3.5.1; \
    else \
        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
        # TRTLLM 1.2.0rc6.post2 has issues installing from pypi with uv, installing from direct wheel link works best
@@ -596,9 +618,9 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
            TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -E 's/tensorrt-llm==([0-9a-zA-Z.+-]+).*/\1/'); \
            PYTHON_TAG="cp$(echo ${PYTHON_VERSION} | tr -d '.')"; \
            DIRECT_URL="https://pypi.nvidia.com/tensorrt-llm/tensorrt_llm-${TRTLLM_VERSION}-${PYTHON_TAG}-${PYTHON_TAG}-linux_${ARCH_ALT}.whl"; \
-            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
+            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${DIRECT_URL}" triton==3.5.1; \
        else \
-            uv pip install --no-cache --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
+            uv pip install --index-strategy=unsafe-best-match --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" triton==3.5.1; \
        fi; \
    fi && \
    # Run TensorRT installer that ships with the TRTLLM wheel
@@ -818,8 +840,9 @@ COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
 ARG ENABLE_KVBM
 ARG ENABLE_GPU_MEMORY_SERVICE
 COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
-RUN uv pip install \
-      --no-cache \
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    uv pip install \
      /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
      /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
      /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
@@ -837,18 +860,20 @@ RUN uv pip install \
            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
            exit 1; \
        fi; \
-        uv pip install --no-cache "$KVBM_WHEEL"; \
+        uv pip install "$KVBM_WHEEL"; \
    fi && \
    cd /workspace/benchmarks && \
-    UV_GIT_LFS=1 uv pip install --no-cache . && \
+    export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install . && \
    # pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
    chmod -R g+w /workspace/benchmarks

 # Install common and test dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    UV_GIT_LFS=1 uv pip install \
-        --no-cache \
+    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install \
        --index-strategy unsafe-best-match \
        --extra-index-url https://download.pytorch.org/whl/cu130 \
        --requirement /tmp/requirements.txt \

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -237,7 +237,10 @@ ENV CUDA_PATH=/usr/local/cuda \
 # Create virtual environment for building wheels
 ARG PYTHON_VERSION
 ENV VIRTUAL_ENV=/workspace/.venv
-RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit

 ARG NIXL_UCX_REF
@@ -436,6 +439,8 @@ RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \

 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -455,6 +460,10 @@ COPY components/ /opt/dynamo/components/
 ARG ENABLE_KVBM
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    --mount=type=cache,target=/root/.cargo/registry \
+    --mount=type=cache,target=/root/.cargo/git \
+    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
    if [ "$USE_SCCACHE" = "true" ]; then \
        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
@@ -522,7 +531,9 @@ COPY --from=dynamo_base /bin/uv /bin/uvx /bin/

 ARG PYTHON_VERSION

-RUN apt-get update -y \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # Python runtime - CRITICAL for virtual environment to work
        python${PYTHON_VERSION}-dev \
@@ -545,6 +556,7 @@ RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/li

 # Create virtual environment
 RUN mkdir -p /opt/dynamo/venv && \
+    export UV_CACHE_DIR=/root/.cache/uv && \
    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

 # Activate virtual environment
@@ -568,6 +580,7 @@ ENV CUDA_HOME=/usr/local/cuda
 # Install VLLM and related dependencies
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    --mount=type=cache,target=/root/.cache/uv \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
    chmod +x /tmp/install_vllm.sh && \
    /tmp/install_vllm.sh \
@@ -657,7 +670,9 @@ ARG PYTHON_VERSION
 ENV PYTHON_VERSION=${PYTHON_VERSION}

 # Install Python, build-essential and python3-dev as apt dependencies
-RUN apt-get update && \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && \
    CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*} &&\
    CUDA_VERSION_MINOR=$(echo "${CUDA_VERSION#*.}" | cut -d. -f1) && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
@@ -765,7 +780,9 @@ COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
 ARG ENABLE_KVBM
 ARG ENABLE_GPU_MEMORY_SERVICE
 COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
-RUN uv pip install \
+RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
+    uv pip install \
      /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
      /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
      /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
@@ -786,15 +803,17 @@ RUN uv pip install \
        uv pip install "$KVBM_WHEEL"; \
    fi && \
    cd /workspace/benchmarks && \
-    UV_GIT_LFS=1 uv pip install --no-cache . && \
+    export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install . && \
    # pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
    chmod -R g+w /workspace/benchmarks

-# Install common and test dependencies
+# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    UV_GIT_LFS=1 uv pip install \
-        --no-cache \
+    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
+    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install \
        --requirement /tmp/requirements.txt \
        --requirement /tmp/requirements.test.txt


--- a/container/README.md
+++ b/container/README.md
@@ -150,6 +150,77 @@ The `build.sh` script is responsible for building Docker images for different AI
 - **Build Caching**: Docker layer caching and sccache support
 - **GPU Optimization**: CUDA, EFA, and NIXL support

+#### BuildKit cache mounts in Dockerfiles
+
+The framework Dockerfiles use BuildKit cache mounts (`RUN --mount=type=cache,...`) to reduce repeated downloads across builds. These caches are stored in Docker/BuildKit’s cache storage on the host (not in your host `~/.cache`), and are shared across builds that use the same builder.
+
+Common cache mount targets:
+- `--mount=type=cache,target=/root/.cache/uv`: `uv` download cache (wheels/sdists, git checkouts used by `uv`, etc.)
+- `--mount=type=cache,target=/var/cache/apt,sharing=locked`: apt download cache (`sharing=locked` avoids apt/dpkg races with concurrent builds)
+- `--mount=type=cache,target=/var/cache/{yum,dnf},sharing=locked`: yum/dnf metadata cache (`sharing=locked` avoids corruption with concurrent builds)
+- `--mount=type=cache,target=/root/.cargo/{registry,git}`: Cargo crate/git download caches (Cargo has its own locking; no `sharing=locked` needed)
+
+To inspect cache usage:
+```bash
+docker buildx du
+docker info --format 'DockerRootDir: {{.DockerRootDir}}'
+```
+
+##### Inspecting BuildKit cache on the host (quick checklist)
+
+1. Quick summary:
+```bash
+docker buildx du | tail -5
+```
+
+2. Find Docker root:
+```bash
+docker info | grep "Docker Root Dir"
+# Output example: Docker Root Dir: /var/lib/docker
+```
+
+3. Check executor storage size:
+```bash
+DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
+sudo du -sh "${DOCKER_ROOT}/buildkit/executor" 2>/dev/null || true
+```
+
+4. Find specific caches (example: uv cache under BuildKit executor rootfs):
+```bash
+DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
+sudo sh -c 'find '"${DOCKER_ROOT}"'/buildkit/executor/*/rootfs/root/.cache/uv -type d 2>/dev/null | while read -r dir; do
+  parent=$(dirname "$(dirname "$(dirname "$dir")")")
+  du -sh "$parent/root/.cache/uv" 2>/dev/null
+done'
+```
+
+5. List all large cache directories:
+```bash
+DOCKER_ROOT="$(docker info --format '{{.DockerRootDir}}')"
+sudo sh -c 'du -sh '"${DOCKER_ROOT}"'/buildkit/executor/* 2>/dev/null | sort -h | tail -10'
+```
+
+Cleanup commands:
+```bash
+# Safe: clean only reclaimable cache
+docker buildx prune
+
+# Aggressive: clean everything
+docker buildx prune --all
+
+# Time-based: remove cache older than 3 days
+docker buildx prune --filter until=72h
+```
+
+Current cache types (as mounted in various Dockerfiles):
+1. `/root/.cache/uv` and `/home/dynamo/.cache/uv` - Python packages (uv; match the current `USER`)
+2. `/root/.cargo/registry` - Rust crates
+3. `/root/.cargo/git` - Rust git deps
+4. `/var/cache/yum`, `/var/cache/dnf` - AlmaLinux packages
+5. `/var/cache/apt` - Ubuntu packages
+
+Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
+
 **How `dev` / `local-dev` builds work:**
 - `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
 - The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).

--- a/container/dev/Dockerfile.dev
+++ b/container/dev/Dockerfile.dev
@@ -72,7 +72,9 @@ SHELL ["/bin/bash", "-c"]
 # This stage only installs generic developer tools that are available from Ubuntu repos, so CUDA repos are unnecessary.
 #
 # We also add a small retry/backoff to make transient apt metadata issues less disruptive.
-RUN set -eux; \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    set -eux; \
    if [ -d /etc/apt/sources.list.d ]; then \
        mkdir -p /tmp/apt-disabled; \
        for f in /etc/apt/sources.list.d/*.list; do \
@@ -156,7 +158,9 @@ RUN set -eux; \

 # Install awk separately with fault tolerance.
 # awk is a virtual package with multiple implementations (gawk, mawk, original-awk).
-RUN (apt-get update && \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    (apt-get update && \
     (apt-get install -y --no-install-recommends gawk || \
      apt-get install -y --no-install-recommends mawk || \
      apt-get install -y --no-install-recommends original-awk || \
@@ -165,7 +169,9 @@ RUN (apt-get update && \
    (command -v awk >/dev/null 2>&1 && echo "awk available: $(command -v awk)" || echo "awk not available")

 # Add NVIDIA devtools repository and install development tools (nsight-systems).
-RUN wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" | \
+# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" | \
        gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
    echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" | \
        tee /etc/apt/sources.list.d/nvidia-devtools.list && \
@@ -346,8 +352,10 @@ RUN git lfs install
 ARG FRAMEWORK
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+    # Cache uv downloads; uv handles its own locking for this cache.
    --mount=type=cache,target=/root/.cache/uv \
-    UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 uv pip install \
+    export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install \
        --index-strategy unsafe-best-match \
        --extra-index-url https://download.pytorch.org/whl/cu130 \
        --requirement /tmp/requirements.txt \
@@ -364,8 +372,10 @@ COPY --chmod=775 --chown=dynamo:0 ./ ${WORKSPACE_DIR}/
 RUN chmod g+w ${WORKSPACE_DIR}

 # Install benchmarks package (includes prefix_data_generator, tabulate, etc.)
-RUN cd ${WORKSPACE_DIR}/benchmarks && \
-    UV_GIT_LFS=1 uv pip install --no-cache .
+RUN --mount=type=cache,target=/root/.cache/uv \
+    cd ${WORKSPACE_DIR}/benchmarks && \
+    export UV_CACHE_DIR=/root/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
+    uv pip install .

 # Install maturin and create editable install entry points.
 #
@@ -374,11 +384,15 @@ RUN cd ${WORKSPACE_DIR}/benchmarks && \
 #   /opt/dynamo/venv/bin and put that venv on PATH, so `uv` is expected to be available here in normal builds.
 # - The `command -v uv` guard is defensive: on SGLang, `uv` needs to "disappear" from PATH and we fall back to
 #   `python3 -m pip` so the editable install can still proceed (instead of failing mid-layer with a confusing error).
-RUN if [ -f pyproject.toml ]; then \
+# Cache uv downloads; uv handles its own locking for this cache.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 PIP_CACHE_DIR=/root/.cache/pip && \
+    if [ -f pyproject.toml ]; then \
        if command -v uv >/dev/null 2>&1; then \
-            uv pip install --no-cache maturin[patchelf] && uv pip install --no-deps -e . ; \
+            uv pip install maturin[patchelf] && uv pip install --no-deps -e . ; \
        else \
-            python3 -m pip install --no-cache-dir maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
+            python3 -m pip install maturin[patchelf] && python3 -m pip install --no-deps -e . ; \
        fi; \
    else \
        echo "ERROR: pyproject.toml not found in ${WORKSPACE_DIR}; expected to build from the Dynamo repo root." >&2; \