ci: add support for vllm sanity testing on Github (#2526)

Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Tushar Sharma <tusharma@nvidia.com>

ci: add support for vllm sanity testing on Github (#2526)
Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Tushar Sharma <tusharma@nvidia.com>
82bae247 · Anant Sharma · GitHub · e3619ce0 · 82bae247 · 82bae247
Unverified Commit 82bae247 authored Aug 28, 2025 by Anant Sharma Committed by GitHub Aug 28, 2025
7 changed files
--- a/.github/workflows/container-validation-backends.yml
+++ b/.github/workflows/container-validation-backends.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name: NVIDIA Github Validation
+
+on:
+  push:
+    branches:
+      - main
+      - "pull-request/[0-9]+"
+
+jobs:
+  build-test:
+    runs-on: gpu-l40-runners
+    strategy:
+      matrix:
+        framework: [vllm]
+        include:
+          - framework: vllm
+            target: runtime
+            pytest_marks: "e2e and vllm and gpu_1 and not slow"
+    # Do not cancel main branch runs
+    concurrency:
+      group: ${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
+      cancel-in-progress: ${{ github.ref_name != 'main' }}
+
+    name: Build and Test - ${{ matrix.framework }}
+    env:
+      CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
+      PYTEST_XML_FILE: pytest_test_report.xml
+      FRAMEWORK: ${{ matrix.framework }}
+      TARGET: ${{ matrix.target }}
+      PYTEST_MARKS: ${{ matrix.pytest_marks }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to NGC
+        if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
+        run: |
+          echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
+      - name: Cleanup
+        if: always()
+        run: |
+          docker system prune -af
+      - name: Debug
+        run: |
+          lsmod | grep nvidia
+          sudo dmesg | grep -i nvrm || true
+          nvidia-smi
+      - name: Build image
+        env:
+          GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
+          AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
+          SCCACHE_S3_BUCKET:  ${{ secrets.SCCACHE_S3_BUCKET }}
+        run: |
+          ./container/build.sh --tag ${{ matrix.framework }}:latest \
+            --target ${{ matrix.target }} \
+            --framework ${{ matrix.framework }} \
+            --use-sccache \
+            --sccache-bucket "$SCCACHE_S3_BUCKET" \
+            --sccache-region "$AWS_DEFAULT_REGION"
+      - name: Run pytest
+        run: |
+          docker run --rm --gpus all -w /workspace \
+            --name ${{ env.CONTAINER_ID }}_pytest \
+            ${{ matrix.framework }}:latest \
+            bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

-name: NVIDIA Test Github Validation
+name: NVIDIA Github Validation

 on:
  push:
@@ -21,6 +9,11 @@ on:
    - main
  pull_request:

+# Do not cancel main branch runs
+concurrency:
+  group: dynamo-build-test-${{ github.ref_name || github.run_id }}
+  cancel-in-progress: ${{ github.ref_name != 'main' }}
+
 jobs:
  build-test:
    runs-on:
@@ -53,7 +46,14 @@ jobs:
          docker compose up -d nats-server etcd-server
      - name: Run Rust checks (block-manager + integration tests)
        run: |
-          docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture'
+          docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \
+            --name ${{ env.CONTAINER_ID }}_rust_checks \
+            ${{ steps.define_image_tag.outputs.image_tag }} \
+            bash -ec 'rustup component add rustfmt clippy && \
+                      cargo fmt -- --check && \
+                      cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \
+                      cargo test --locked --all-targets --features=block-manager && \
+                      cargo test --locked --features integration -- --nocapture'
      - name: Cleanup services
        if: always()
        working-directory: ./deploy
@@ -63,7 +63,10 @@ jobs:
        env:
          PYTEST_MARKS: "pre_merge or mypy"
        run: |
-          docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
+          docker run -v ${{ github.workspace }}:/workspace -w /workspace \
+            --name ${{ env.CONTAINER_ID }}_pytest \
+            ${{ steps.define_image_tag.outputs.image_tag }} \
+            bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
      - name: Copy test report from test Container
        if: always()
        run: |

--- a/container/Dockerfile
+++ b/container/Dockerfile
 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0

+##################################
+########## Build Arguments ########
+##################################
+
+# Base image configuration
 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 # TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
 # for details and reproducer to manually test if the image
 # can be updated to later versions.
 ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+
+# Build configuration
 ARG RELEASE_BUILD=false
 ARG ENABLE_KVBM=false
+ARG CARGO_BUILD_JOBS

 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -23,6 +31,17 @@ ARG ENABLE_KVBM=false
 ARG ARCH=amd64
 ARG ARCH_ALT=x86_64

+# SCCACHE configuration
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET=""
+ARG SCCACHE_REGION=""
+
+# NIXL configuration
+ARG NIXL_UCX_REF=v1.19.0
+ARG NIXL_REF=0.4.1
+
+# Python configuration
+ARG PYTHON_VERSION=3.12

 ##################################
 ########## Base Image ############
@@ -30,44 +49,66 @@ ARG ARCH_ALT=x86_64

 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base

-# Redeclare ARCH and ARCH_ALT so they're available in this stage
+# Redeclare ARGs for this stage
 ARG ARCH
 ARG ARCH_ALT
-ARG CARGO_BUILD_JOBS
-
-ARG NIXL_UCX_REF=v1.19.0
-ARG NIXL_REF=0.4.1
-
-# Environment variables for NIXL
-ENV NIXL_SRC_DIR=/opt/nixl \
-    NIXL_PREFIX=/opt/nvidia/nvda_nixl \
-    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
-    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
+ARG PYTHON_VERSION
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET
+ARG SCCACHE_REGION
+ARG NIXL_UCX_REF
+ARG NIXL_REF

 USER root
-ARG PYTHON_VERSION=3.12
+WORKDIR /opt/dynamo
+
+##################################
+########## Tool Installation #####
+##################################

+# Install uv package manager
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

+# Install SCCACHE if requested
+COPY container/use-sccache.sh /tmp/use-sccache.sh
+RUN if [ "$USE_SCCACHE" = "true" ]; then \
+        /tmp/use-sccache.sh install; \
+    fi
+
+# Set SCCACHE environment variables
+ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
+    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
+    SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
+    RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
+    CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
+
+##################################
+########## Rust Setup ############
+##################################
+
 # Rust environment setup
 ENV RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH \
    RUST_VERSION=1.89.0

-WORKDIR /opt/dynamo
-
 # Define Rust target based on ARCH_ALT ARG
 ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

-# Install Rust using RUSTARCH derived from ARCH_ALT
+# Install Rust
 RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
-    # TODO OPS-591: Add SHA check back based on RUSTARCH
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME

+##################################
+########## System Dependencies ###
+##################################
+
+# Install system packages
 RUN apt-get update -y \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # NIXL build dependencies
@@ -96,12 +137,17 @@ RUN apt-get update -y \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

-# Download external dependencies in parallel for better performance
+##################################
+########## External Services #####
+##################################
+
+# Install NATS server
 ENV NATS_VERSION="v2.10.28"
 RUN --mount=type=cache,target=/var/cache/apt \
    wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
    dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb

+# Install etcd
 ENV ETCD_VERSION="v3.5.21"
 RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
    mkdir -p /usr/local/bin/etcd && \
@@ -109,13 +155,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo
    rm /tmp/etcd.tar.gz
 ENV PATH=/usr/local/bin/etcd/:$PATH

-### UCX EFA Setup ###
+##################################
+########## UCX Build #############
+##################################
+
+# Build and install UCX
 RUN rm -rf /opt/hpcx/ucx && \
    rm -rf /usr/local/ucx && \
    echo "Building UCX with reference $NIXL_UCX_REF" && \
    cd /usr/local/src && \
    git clone https://github.com/openucx/ucx.git && \
    cd ucx && git checkout $NIXL_UCX_REF && \
+    CC=${USE_SCCACHE:+sccache gcc} && \
+    CXX=${USE_SCCACHE:+sccache g++} && \
+    export CC=${CC} && \
+    export CXX=${CXX} && \
    ./autogen.sh && \
    ./configure \
      --prefix=/usr/local/ucx \
@@ -133,6 +187,7 @@ RUN rm -rf /opt/hpcx/ucx && \
      --enable-mt && \
    make -j$(nproc) &&                              \
    make -j$(nproc) install-strip &&                \
+    /tmp/use-sccache.sh show-stats "UCX" && \
    echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
    echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
    ldconfig && \
@@ -144,8 +199,17 @@ ENV CPATH=/usr/include:$CPATH \
    PATH=/usr/bin:$PATH \
    PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH

-### NIXL SETUP ###
-# Clone nixl source with shallow clone for faster download
+##################################
+########## NIXL Setup ############
+##################################
+
+# NIXL environment setup
+ENV NIXL_SRC_DIR=/opt/nixl \
+    NIXL_PREFIX=/opt/nvidia/nvda_nixl \
+    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
+    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
+
+# Build and install NIXL
 RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
    cd ${NIXL_SRC_DIR} && \
    if [ "$ARCH" = "arm64" ]; then \
@@ -154,13 +218,13 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.
        nixl_build_args=""; \
    fi && \
    meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
-    ninja -C build/ -j$(nproc) && \
-    ninja -C build/ install && \
+    ninja -C build/ -j$(nproc) && ninja -C build/ install && \
+    /tmp/use-sccache.sh show-stats "NIXL" && \
    echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
    echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
    ldconfig

-# Install NIXL Python module
+# Build NIXL Python module
 # TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64
 RUN if [ "$ARCH" = "arm64" ]; then \
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
@@ -169,11 +233,15 @@ RUN if [ "$ARCH" = "arm64" ]; then \
        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \
    fi

-# Create virtual environment
+##################################
+########## Python Environment ####
+##################################
+
+# Create and activate virtual environment
+ARG PYTHON_VERSION
 RUN mkdir -p /opt/dynamo/venv && \
-    uv venv /opt/dynamo/venv --python 3.12
+    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

-# Activate virtual environment
 ENV VIRTUAL_ENV=/opt/dynamo/venv \
    PATH="/opt/dynamo/venv/bin:${PATH}"

@@ -191,43 +259,58 @@ ARG ARCH_ALT

 FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder

+# Redeclare ARGs for this stage
+ARG ARCH
+ARG ARCH_ALT
 ARG CARGO_BUILD_JOBS
-# Set CARGO_BUILD_JOBS to 16 if not provided
-# This is to prevent cargo from building $(nproc) jobs in parallel,
-# which might exceed the number of opened files limit.
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
 ARG RELEASE_BUILD
-# Use arg ENABLE_KVBM = true to turn on the block-manager feature
 ARG ENABLE_KVBM
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET
+ARG SCCACHE_REGION

 WORKDIR /opt/dynamo

-RUN dnf update -y \
-    && dnf install -y llvm-toolset protobuf-compiler python3.12-devel \
-    && dnf clean all \
-    && rm -rf /var/cache/dnf
-
-ENV RUSTUP_HOME=/usr/local/rustup \
+# Set environment variables
+ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
+    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    CARGO_TARGET_DIR=/opt/dynamo/target \
    VIRTUAL_ENV=/opt/dynamo/venv \
-    NIXL_PREFIX=/opt/nvidia/nvda_nixl
+    NIXL_PREFIX=/opt/nvidia/nvda_nixl \
+    PATH=/usr/local/cargo/bin:/opt/dynamo/venv/bin:$PATH
+
+# Install system dependencies
+ARG PYTHON_VERSION
+RUN dnf update -y \
+    && dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget \
+    && dnf clean all \
+    && rm -rf /var/cache/dnf

+# Copy artifacts from base stage
 COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
 COPY --from=base $CARGO_HOME $CARGO_HOME
 COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
 COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
-ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH

-# Copy configuration files first for better layer caching
-COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
+# Install SCCACHE if requested
+COPY container/use-sccache.sh /tmp/use-sccache.sh
+RUN if [ "$USE_SCCACHE" = "true" ]; then \
+        /tmp/use-sccache.sh install; \
+    fi
+
+# Set SCCACHE environment variables
+ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
+    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
+    SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
+    RUSTC_WRAPPER=${USE_SCCACHE:+sccache}

-# Copy source code
+# Copy source code (order matters for layer caching)
+COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
 COPY lib/ /opt/dynamo/lib/
 COPY components/ /opt/dynamo/components/

-# Build dynamo wheel
+# Build wheels
 RUN uv build --wheel --out-dir /opt/dynamo/dist && \
    cd /opt/dynamo/lib/bindings/python && \
    uv pip install maturin[patchelf] && \
@@ -237,14 +320,15 @@ RUN uv build --wheel --out-dir /opt/dynamo/dist && \
        maturin build --release --out /opt/dynamo/dist; \
    fi && \
    if [ "$RELEASE_BUILD" = "true" ]; then \
-        # do not enable KVBM feature, ensure compatibility with lower glibc
        uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
        uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \
-    fi
+    fi && \
+    /tmp/use-sccache.sh show-stats "Dynamo"

 ##############################################
 ########## Dev entrypoint image ##############
 ##############################################
+
 FROM base AS dev

 # Application environment variables
@@ -254,16 +338,13 @@ ENV DYNAMO_HOME=/opt/dynamo \

 WORKDIR /opt/dynamo

+# Copy built artifacts
 COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
 COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
-
-# Copy Cargo cache to avoid re-downloading dependencies
 COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

-# Temporarily copy benchmarks folder for installation
+# Install Python packages
 COPY benchmarks/ /opt/dynamo/benchmarks/
-
-# Install all python packages
 RUN uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
@@ -271,10 +352,10 @@ RUN uv pip install \
    /opt/dynamo/benchmarks && \
    rm -rf /opt/dynamo/benchmarks

-# Copy launch banner
+# Setup launch banner
 RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
    sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc

 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
+CMD []
\ No newline at end of file
--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0

 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-# FIXME: NCCL will hang with 25.03, so use 25.01 for now
+# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
 # for details and reproducer to manually test if the image
 # can be updated to later versions.
@@ -16,6 +16,11 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
 ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892"  # from v0.10.1.1
 ARG TORCH_BACKEND="cu128"

+# sccache configuration - inherit from base build
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET=""
+ARG SCCACHE_REGION=""
+
 # Match 0.10.1.1 vLLM release
 # https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
 # Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
@@ -37,187 +42,172 @@ ARG FLASHINF_REF="v0.2.11"
 # without adding if statements everywhere, so just define both as ARGs for now.
 ARG ARCH=amd64
 ARG ARCH_ALT=x86_64
-
-##################################
-########## Base Image ############
-##################################
-
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
-
-# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
-ARG ARCH
-ARG ARCH_ALT
-ARG TORCH_BACKEND
-
-USER root
+# Python configuration
 ARG PYTHON_VERSION=3.12

-RUN apt-get update -y && \
-    apt-get install -y --no-install-recommends  \
-    # NIXL build dependencies
-    cmake \
-    meson \
-    ninja-build \
-    pybind11-dev \
-    # These headers are missing with the hpcx installer, required
-    # by UCX to find RDMA devices
-    libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
-    libnuma-dev librdmacm-dev ibverbs-providers \
-    # Rust build dependencies
-	clang \
-    libclang-dev \
-	git \
-    build-essential \
-    protobuf-compiler \
-    libssl-dev \
-    pkg-config \
-    # Install utilities
-    nvtop \
-    tmux \
-    vim \
-    autoconf \
-    automake \
-    libtool \
-    net-tools \
-    # For Prometheus
-    curl tar ca-certificates && \
-    rm -rf /var/lib/apt/lists/*
-
-ARG NIXL_UCX_REF=v1.19.0
-ARG NIXL_REF=0.4.1
+ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
+FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base

-ENV NIXL_SRC_DIR=/opt/nixl
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ARG ARCH_ALT
-ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
-ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH
-
-WORKDIR /workspace
-
-### UCX EFA Setup ###
-RUN rm -rf /opt/hpcx/ucx && \
-    rm -rf /usr/local/ucx && \
-    echo "Building UCX with reference $NIXL_UCX_REF" && \
-    cd /usr/local/src &&                            \
-    git clone https://github.com/openucx/ucx.git && \
-    cd ucx &&                                       \
-    git checkout $NIXL_UCX_REF &&                   \
-    ./autogen.sh && ./configure                     \
-    --prefix=/usr/local/ucx                         \
-    --enable-shared                                 \
-    --disable-static                                \
-    --disable-doxygen-doc                           \
-    --enable-optimizations                          \
-    --enable-cma                                    \
-    --enable-devel-headers                          \
-    --with-cuda=/usr/local/cuda                     \
-    --with-verbs                                    \
-    --with-efa                                      \
-    --with-dm                                       \
-    --with-gdrcopy=/usr/local                       \
-    --enable-mt &&                                  \
-    make -j &&                                      \
-    make -j install-strip &&                        \
-    ldconfig
-
-ENV LD_LIBRARY_PATH=\
-/usr/lib:/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-$LD_LIBRARY_PATH
-ENV CPATH=/usr/include
-ENV PATH=/usr/bin:$PATH
-ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
-SHELL ["/bin/bash", "-c"]
-
-WORKDIR /workspace
-
-### NIXL SETUP ###
-# Clone nixl source
-# TEMP: disable gds backend for arm64
-RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
-    cd ${NIXL_SRC_DIR} && \
-    git checkout ${NIXL_REF} && \
-    if [ "$ARCH" = "arm64" ]; then \
-        nixl_build_args="-Ddisable_gds_backend=true"; \
-    else \
-        nixl_build_args=""; \
-    fi && \
-    mkdir build && \
-    meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
-    cd build/ && \
-    ninja && \
-    ninja install;
-
-### NATS & ETCD SETUP ###
-ENV ETCD_VERSION="v3.5.21"
-RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
-    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb && \
-    wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
-    mkdir -p /usr/local/bin/etcd && \
-    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
-    rm /tmp/etcd.tar.gz
-ENV PATH=/usr/local/bin/etcd/:$PATH
+########################################################
+########## Framework Development Image ################
+########################################################
+#
+# PURPOSE: Framework development and vLLM compilation
+#
+# This stage builds and compiles framework dependencies including:
+# - vLLM inference engine with CUDA support
+# - DeepGEMM and FlashInfer optimizations
+# - All necessary build tools and compilation dependencies
+# - Framework-level Python packages and extensions
+#
+# Use this stage when you need to:
+# - Build vLLM from source with custom modifications
+# - Develop or debug framework-level components
+# - Create custom builds with specific optimization flags
+#

+# Use dynamo base image (see /container/Dockerfile for more details)
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
+
+RUN apt-get update -y \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        # vLLM build dependencies
+        cmake \
+        ibverbs-providers \
+        ibverbs-utils \
+        libibumad-dev \
+        libibverbs-dev \
+        libnuma-dev \
+        librdmacm-dev \
+        rdma-core \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*

 ### VIRTUAL ENVIRONMENT SETUP ###

-# Install uv and create virtualenv
-ENV VIRTUAL_ENV=/opt/dynamo/venv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir /opt/dynamo && \
-    uv venv ${VIRTUAL_ENV} --python 3.12
+ARG PYTHON_VERSION
+# Create virtual environment
+RUN mkdir -p /opt/dynamo/venv && \
+    uv venv /opt/dynamo/venv --python $PYTHON_VERSION

 # Activate virtual environment
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
-# Install NIXL Python module
-# TODO: Move gds_path selection based on arch into NIXL build
-# TEMP: disable gds backend for arm64
-RUN if [ "$ARCH" = "arm64" ]; then \
-        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
-        --config-settings=setup-args="-Ddisable_gds_backend=true"; \
-    else \
-        cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
-    fi && \
-    # Install the wheel
-    # TODO: Move NIXL wheel install to the wheel_builder stage
-    uv pip install /workspace/wheels/nixl/*.whl
+ENV VIRTUAL_ENV=/opt/dynamo/venv \
+    PATH="/opt/dynamo/venv/bin:${PATH}"

+ARG ARCH
 # Install vllm - keep this early in Dockerfile to avoid
 # rebuilds from unrelated source code changes
 ARG VLLM_REF
 ARG VLLM_GIT_URL
 ARG DEEPGEMM_REF
 ARG FLASHINF_REF
+ARG TORCH_BACKEND

 ARG MAX_JOBS=16
 ENV MAX_JOBS=$MAX_JOBS
 ENV CUDA_HOME=/usr/local/cuda

+# Install sccache if requested
+COPY container/use-sccache.sh /tmp/use-sccache.sh
+# Install sccache if requested
+ARG USE_SCCACHE
+ARG ARCH_ALT
+ARG SCCACHE_BUCKET
+ARG SCCACHE_REGION
+
+ENV ARCH_ALT=${ARCH_ALT}
+RUN if [ "$USE_SCCACHE" = "true" ]; then \
+        /tmp/use-sccache.sh install; \
+    fi
+
+# Set environment variables - they'll be empty strings if USE_SCCACHE=false
+ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
+    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
+    SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
+    CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
+# Install VLLM and related dependencies
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    --mount=type=cache,target=/root/.cache/uv \
        # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
        # Should be able to select how you want your build to go
        cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
        chmod +x /tmp/install_vllm.sh && \
-        /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND;
+        /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND && \
+        /tmp/use-sccache.sh show-stats "vLLM";

 ENV LD_LIBRARY_PATH=\
 /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
 $LD_LIBRARY_PATH

-# Common dependencies
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
-    uv pip install --requirement /tmp/requirements.txt

-### MISC UTILITY SETUP ###
+##################################################
+########## Runtime Image ########################
+##################################################
+#
+# PURPOSE: Production runtime environment
+#
+# This stage creates a lightweight production-ready image containing:
+# - Pre-compiled vLLM and framework dependencies
+# - Dynamo runtime libraries and Python packages
+# - Essential runtime dependencies and configurations
+# - Optimized for inference workloads and deployment
+#
+# Use this stage when you need:
+# - Production deployment of Dynamo with vLLM
+# - Minimal runtime footprint without build tools
+# - Ready-to-run inference server environment
+# - Base for custom application containers
+#
+
+FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

-# Install test dependencies
-RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
-    uv pip install --requirement /tmp/requirements.txt && \
-    pyright --help > /dev/null 2>&1 && \
-    printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig
+WORKDIR /workspace
+ENV DYNAMO_HOME=/opt/dynamo
+ENV VIRTUAL_ENV=/opt/dynamo/venv
+ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+
+ARG ARCH_ALT
+ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
+ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
+ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+
+# Install Python, build-essential and python3-dev as apt dependencies
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        # Python runtime - CRITICAL for virtual environment to work
+        python3-dev \
+        build-essential \
+        # jq and curl for polling various endpoints and health checks
+        jq \
+        curl \
+        # Libraries required by UCX to find RDMA devices
+        libibverbs1 rdma-core ibverbs-utils libibumad3 \
+        libnuma1 librdmacm1 ibverbs-providers \
+        # JIT Kernel Compilation, flashinfer
+        ninja-build \
+        g++ \
+        # prometheus dependencies
+        ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
+COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
+COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
+COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
+COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
+COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
+COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
+COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
+
+### COPY NATS & ETCD ###
+# Copy nats and etcd from dev image
+COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
+COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
+# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
+ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH

 # Install prometheus
 ARG PROM_VERSION=3.4.1
@@ -227,41 +217,122 @@ RUN ARCH=$(dpkg --print-architecture) && \
        arm64) PLATFORM=linux-arm64 ;; \
        *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
    esac && \
-    curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \
+    curl -fsSL "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \
    | tar -xz -C /tmp && \
-    mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \
+    mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \
    chmod +x /usr/local/bin/prometheus && \
-    rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM}
+    rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}"

-### BUILDS ###
+# Copy UCX from dev image as plugin for NIXL
+# Copy NIXL source from devr image
+# Copy dynamo wheels for gitlab artifacts
+COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
+COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX

-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH \
-    RUST_VERSION=1.89.0
-
-# Define Rust target based on ARCH_ALT ARG
-ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
-
-# Install Rust using RUSTARCH derived from ARCH_ALT
-RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
-    # TODO: Add SHA check back based on RUSTARCH
-    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
-    rm rustup-init && \
-    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
-
-ARG CARGO_BUILD_JOBS
-# Set CARGO_BUILD_JOBS to 16 if not provided
-# This is to prevent cargo from building $(nproc) jobs in parallel,
-# which might exceed the number of opened files limit.
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
+# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
+COPY --from=framework /opt/vllm /opt/vllm
+
+ENV LD_LIBRARY_PATH=\
+/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
+$NIXL_LIB_DIR:\
+$NIXL_PLUGIN_DIR:\
+/usr/local/ucx/lib:\
+/usr/local/ucx/lib/ucx:\
+$LD_LIBRARY_PATH
+
+### VIRTUAL ENVIRONMENT SETUP ###
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+ARG PYTHON_VERSION
+RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION
+
+# Copy virtual environment from framework image to avoid re-installing framework + vllm dependencies
+COPY --from=framework \
+    /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages \
+    /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages
+
+# Install dynamo, NIXL, and dynamo-specific dependencies
+COPY benchmarks/ /opt/dynamo/benchmarks/
+COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
+RUN uv pip install \
+    /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
+    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
+    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
+    /opt/dynamo/benchmarks && \
+    rm -rf /opt/dynamo/benchmarks
+
+# Install common and test dependencies
+RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
+    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+    uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt
+
+# Copy benchmarks, examples, and tests for CI
+COPY . /workspace/
+
+# Copy attribution files
+COPY ATTRIBUTION* LICENSE /workspace/
+# Copy launch banner
+RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
+    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
+    echo "cat ~/.launch_screen" >> ~/.bashrc && \
+    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
+
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+CMD []

 #######################################
-########## Local Development ##########
+########## Local Development #######
 #######################################
+#
+# PURPOSE: Local development
+#
+# This stage adds development tools, utilities, and dependencies specifically
+# needed for:
+# - Local development and debugging
+# - vscode/cursor development
+#
+# Use this stage when you need a full development environment with additional
+# tooling beyond the base runtime image.

-FROM base AS local-dev
+FROM runtime AS local-dev
+
+# Install utilities
+RUN apt-get update -y && \
+    apt-get install -y --no-install-recommends  \
+    # Install utilities
+    nvtop \
+    wget \
+    tmux \
+    vim \
+    autoconf \
+    automake \
+    libtool \
+    net-tools \
+    git \
+    # Build Dependencies
+    autoconf \
+    automake \
+    cmake \
+    git \
+    libtool \
+    meson \
+    net-tools \
+    ninja-build \
+    pybind11-dev \
+    # Rust build dependencies
+    clang \
+    libclang-dev \
+    protobuf-compiler && \
+    rm -rf /var/lib/apt/lists/*
+
+# Rust environment setup
+ENV RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    CARGO_TARGET_DIR=/opt/dynamo/target \
+    PATH=/usr/local/cargo/bin:$PATH
+
+COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
+COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME

 # https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
 # Will use the default ubuntu user, but give sudo access
@@ -282,9 +353,9 @@ RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \

 # This is a slow operation (~40s on my cpu)
 # Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
-COPY --from=base --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
+COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
 RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
-COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
+COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin

 # so we can use maturin develop
 RUN uv pip install maturin[patchelf]
@@ -303,208 +374,5 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman

 RUN mkdir -p /home/$USERNAME/.cache/

-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-
-##################################
-##### Wheel Build Image ##########
-##################################
-
-# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
-ARG ARCH_ALT
-
-FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
-
-ARG CARGO_BUILD_JOBS
-# Set CARGO_BUILD_JOBS to 16 if not provided
-# This is to prevent cargo from building $(nproc) jobs in parallel,
-# which might exceed the number of opened files limit.
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
-ARG RELEASE_BUILD
-# Use arg ENABLE_KVBM = true to turn on the block-manager feature
-ARG ENABLE_KVBM
-
-# Keep in sync with the base image.
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-
-WORKDIR /workspace
-
-RUN yum update -y \
-    && yum install -y llvm-toolset \
-    && yum install -y python3.12-devel \
-    && yum install -y protobuf-compiler \
-    && yum clean all \
-    && rm -rf /var/cache/yum
-
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    CARGO_TARGET_DIR=/workspace/target \
-    VIRTUAL_ENV=/opt/dynamo/venv
-
-COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
-COPY --from=base $CARGO_HOME $CARGO_HOME
-COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
-COPY --from=base /workspace /workspace
-COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
-ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
-
-# Copy configuration files
-COPY pyproject.toml /workspace/
-COPY README.md /workspace/
-COPY LICENSE /workspace/
-COPY Cargo.toml /workspace/
-COPY Cargo.lock /workspace/
-COPY rust-toolchain.toml /workspace/
-COPY hatch_build.py /workspace/
-
-# Copy source code
-COPY lib/ /workspace/lib/
-COPY components /workspace/components
-COPY launch /workspace/launch
-
-RUN cargo build \
-	--release \
-	--locked \
-	--features dynamo-llm/block-manager \
-	--workspace
-
-# Build dynamo wheel
-RUN uv build --wheel --out-dir /workspace/dist && \
-    cd /workspace/lib/bindings/python && \
-    uv pip install maturin[patchelf] && \
-    if [ "$ENABLE_KVBM" = "true" ]; then \
-        maturin build --release --features block-manager --out /workspace/dist; \
-    else \
-        maturin build --release --out /workspace/dist; \
-    fi && \
-    if [ "$RELEASE_BUILD" = "true" ]; then \
-        # do not enable KVBM feature, ensure compatibility with lower glibc
-        uv run --python 3.11 maturin build --release --out /workspace/dist && \
-        uv run --python 3.10 maturin build --release --out /workspace/dist; \
-    fi
-
-#######################################
-########## CI Minimum Image ###########
-#######################################
-FROM base AS ci_minimum
-
-ENV DYNAMO_HOME=/workspace
-ENV CARGO_TARGET_DIR=/workspace/target
-
-WORKDIR /workspace
-
-COPY --from=wheel_builder /workspace /workspace
-COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
-
-# Copy Cargo cache to avoid re-downloading dependencies
-COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
-
-# Copy rest of the code
-COPY . /workspace
-
-# Package the bindings
-RUN mkdir -p /opt/dynamo/bindings/wheels && \
-    mkdir /opt/dynamo/bindings/lib && \
-    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
-    cp target/release/metrics /usr/local/bin
-
-RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
-    uv pip install /workspace/dist/ai_dynamo*any.whl
-
-RUN uv pip install /workspace/benchmarks
-
-# Copy launch banner
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
-    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
-    echo "cat ~/.launch_screen" >> ~/.bashrc
-
-########################################
-########## Development Image ###########
-########################################
-FROM ci_minimum AS dev
-
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-
-CMD []
-
-####################################
-########## Runtime Image ###########
-####################################
-
-FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
-
-WORKDIR /workspace
-ENV DYNAMO_HOME=/workspace
-ENV VIRTUAL_ENV=/opt/dynamo/venv
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
-ARG ARCH_ALT
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
-
-# Install build-essential and python3-dev as apt dependencies
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        build-essential \
-        python3-dev \
-        # jq and curl for polling various endpoints and health checks
-        jq \
-        curl \
-        # For debugging
-        vim \
-        # Libraries required by UCX to find RDMA devices
-        libibverbs1 rdma-core ibverbs-utils libibumad3 \
-        libnuma1 librdmacm1 ibverbs-providers \
-        # JIT Kernel Compilation, flashinfer
-        ninja-build \
-        g++ \
-        cuda-toolkit-12-8 && \
-    rm -rf /var/lib/apt/lists/*
-
-### COPY NATS & ETCD & PROMETHEUS ###
-# Copy nats and etcd from base image
-COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
-COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-ENV PATH=/usr/local/bin/etcd/:$PATH
-
-# Copy prometheus from base image
-COPY --from=base /usr/local/bin/prometheus /usr/local/bin/prometheus
-
-# Copy UCX from base image as plugin for NIXL
-# Copy NIXL source from wheel_builder image
-# Copy dynamo wheels for gitlab artifacts
-COPY --from=base /usr/local/ucx /usr/local/ucx
-COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
-COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
-
-# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
-COPY --from=base /opt/vllm /opt/vllm
-
-ENV LD_LIBRARY_PATH=\
-/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
-$NIXL_LIB_DIR:\
-$NIXL_PLUGIN_DIR:\
-/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-$LD_LIBRARY_PATH
-
-# Copy entire venv
-# Theres a lot of stuff we'd have to re-compile (for arm64)
-# TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment
-# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
-COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
-COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-
-# Keep everything from ci_minimum for mypy and other pre-merge tests
-# TODO: Remove this once we have a functional CI image built on top of the runtime image
-COPY --from=ci_minimum /workspace/ /workspace/
-
-# Copy launch banner
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
-    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
-    echo "cat ~/.launch_screen" >> ~/.bashrc && \
-    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
-
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
--- a/container/build.sh
+++ b/container/build.sh
@@ -121,6 +121,11 @@ NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76

 NO_CACHE=""

+# sccache configuration for S3
+USE_SCCACHE=""
+SCCACHE_BUCKET=""
+SCCACHE_REGION=""
+
 get_options() {
    while :; do
        case $1 in
@@ -282,9 +287,25 @@ get_options() {
        --make-efa)
            NIXL_UCX_REF=$NIXL_UCX_EFA_REF
            ;;
-        --)
-            shift
-            break
+        --use-sccache)
+            USE_SCCACHE=true
+            ;;
+        --sccache-bucket)
+            if [ "$2" ]; then
+                SCCACHE_BUCKET=$2
+                shift
+            else
+                missing_requirement "$1"
+            fi
+            ;;
+
+        --sccache-region)
+            if [ "$2" ]; then
+                SCCACHE_REGION=$2
+                shift
+            else
+                missing_requirement "$1"
+            fi
            ;;
         -?*)
            error 'ERROR: Unknown option: ' "$1"
@@ -345,6 +366,16 @@ get_options() {
    else
        TARGET_STR="--target dev"
    fi
+
+    # Validate sccache configuration
+    if [ "$USE_SCCACHE" = true ]; then
+        if [ -z "$SCCACHE_BUCKET" ]; then
+            error "ERROR: --sccache-bucket is required when --use-sccache is specified"
+        fi
+        if [ -z "$SCCACHE_REGION" ]; then
+            error "ERROR: --sccache-region is required when --use-sccache is specified"
+        fi
+    fi
 }


@@ -360,6 +391,15 @@ show_image_options() {
    echo "   Build Context: '${BUILD_CONTEXT}'"
    echo "   Build Arguments: '${BUILD_ARGS}'"
    echo "   Framework: '${FRAMEWORK}'"
+    if [ "$USE_SCCACHE" = true ]; then
+        echo "   sccache: Enabled"
+        echo "   sccache Bucket: '${SCCACHE_BUCKET}'"
+        echo "   sccache Region: '${SCCACHE_REGION}'"
+
+        if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
+            echo "   sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
+        fi
+    fi
    echo ""
 }

@@ -386,6 +426,9 @@ show_help() {
    echo "  [--make-efa Enables EFA support for NIXL]"
    echo "  [--enable-kvbm Enables KVBM support in Python 3.12]"
    echo "  [--trtllm-use-nixl-kvcache-experimental Enables NIXL KVCACHE experimental support for TensorRT-LLM]"
+    echo "  [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
+    echo "  [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
+    echo "  [--sccache-region S3 region for sccache (required with --use-sccache)]"
    exit 0
 }

@@ -400,6 +443,7 @@ error() {

 get_options "$@"

+
 # Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
 ARCH="amd64"
 if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
@@ -547,6 +591,15 @@ if [ -n "${NIXL_UCX_REF}" ]; then
    BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
 fi

+# Add sccache build arguments
+if [ "$USE_SCCACHE" = true ]; then
+    BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
+    BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
+    BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
+
+
+fi
+
 LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
 if [ -n "${TARGET}" ]; then
    LATEST_TAG="${LATEST_TAG}-${TARGET}"
@@ -558,6 +611,24 @@ if [ -z "$RUN_PREFIX" ]; then
    set -x
 fi

-$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+# TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles.
+if [[ $FRAMEWORK == "VLLM" ]]; then
+    # Define base image tag before using it
+    DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}"
+    # Start base image build
+    echo "======================================"
+    echo "Starting Build 1: Base Image"
+    echo "======================================"
+    $RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+    # Start framework build
+    echo "======================================"
+    echo "Starting Build 2: Framework Image"
+    echo "======================================"
+    BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}"
+    $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+else
+    $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+fi
+

 { set +x; } 2>/dev/null
--- a/container/use-sccache.sh
+++ b/container/use-sccache.sh
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+# sccache management script
+# This script handles sccache installation, environment setup, and statistics display
+
+SCCACHE_VERSION="v0.8.2"
+
+
+usage() {
+    cat << EOF
+Usage: $0 [COMMAND] [OPTIONS]
+
+Commands:
+    install         Install sccache binary (requires ARCH_ALT environment variable)
+    show-stats      Display sccache statistics with optional build name
+    help            Show this help message
+
+Environment variables:
+    USE_SCCACHE             Set to 'true' to enable sccache
+    SCCACHE_BUCKET          S3 bucket name (fallback if not passed as parameter)
+    SCCACHE_REGION          S3 region (fallback if not passed as parameter)
+    ARCH                    Architecture for S3 key prefix (fallback if not passed as parameter)
+    ARCH_ALT                Alternative architecture name for downloads (e.g., x86_64, aarch64)
+
+Examples:
+    # Install sccache (requires ARCH_ALT to be set)
+    ARCH_ALT=x86_64 $0 install
+    # Show stats with build name
+    $0 show-stats "UCX"
+EOF
+}
+
+install_sccache() {
+    if [ -z "${ARCH_ALT:-}" ]; then
+        echo "Error: ARCH_ALT environment variable is required for sccache installation"
+        exit 1
+    fi
+    echo "Installing sccache ${SCCACHE_VERSION} for architecture ${ARCH_ALT}..."
+    # Download and install sccache
+    wget --tries=3 --waitretry=5 \
+        "https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
+    tar -xzf "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
+    mv "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl/sccache" /usr/local/bin/
+    # Cleanup
+    rm -rf sccache*
+    echo "sccache installed successfully"
+}
+
+show_stats() {
+    if command -v sccache >/dev/null 2>&1; then
+        echo "=== sccache statistics AFTER $1 ==="
+        sccache --show-stats
+    else
+        echo "sccache is not available"
+    fi
+}
+
+main() {
+    case "${1:-help}" in
+        install)
+            install_sccache
+            ;;
+        generate-env)
+            shift  # Remove the command from arguments
+            generate_env_file "$@"  # Pass all remaining arguments
+            ;;
+        show-stats)
+            shift  # Remove the command from arguments
+            show_stats "$@"  # Pass all remaining arguments
+            ;;
+        help|--help|-h)
+            usage
+            ;;
+        *)
+            echo "Unknown command: $1"
+            usage
+            exit 1
+            ;;
+    esac
+}
+
+main "$@"
--- a/tests/serve/test_vllm.py
+++ b/tests/serve/test_vllm.py
@@ -263,7 +263,6 @@ def vllm_config_test(request):


 @pytest.mark.e2e
-@pytest.mark.slow
 def test_serve_deployment(vllm_config_test, request, runtime_services):
    """
    Test dynamo serve deployments with different graph configurations.