feat: Dockerfile templating (#5633)

Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>

feat: Dockerfile templating (#5633)
Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>
ac020629 · Dillon Cullinan · GitHub · 5755a8de · ac020629 · ac020629
Unverified Commit ac020629 authored Feb 10, 2026 by Dillon Cullinan Committed by GitHub Feb 10, 2026
20 changed files
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
 ```bash
 # Single command approach (recommended)
 export FRAMEWORK=VLLM         # Note: any of VLLM, SGLANG, TRTLLM can be used
-./container/build.sh --framework $FRAMEWORK --target local-dev
+python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
 # Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
 ```
-Alternatively, you can build a development container, then build local-dev:
-```bash
-export FRAMEWORK=VLLM
-./container/build.sh --framework $FRAMEWORK
-# Now you have a development image dynamo:latest-vllm
-./container/build.sh --dev-image dynamo:latest-${FRAMEWORK,,}
-# Now you have a local-dev image dynamo:latest-vllm-local-dev
-```
 The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.).
 ### Step 1: Choose Your Framework
@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting
   # If missing, build the dev image first, then build local-dev
   export FRAMEWORK=VLLM  # Replace with VLLM, SGLANG, or TRTLLM
-   ./container/build.sh --framework $FRAMEWORK
+   python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
-   # change to lower case portable way across shells
+   docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
-   ./container/build.sh --dev-image dynamo:latest-$(echo "$FRAMEWORK" | tr '[:upper:]' '[:lower:]') --framework "$FRAMEWORK"
-   # Now you have dynamo:latest-vllm-local-dev
   ```
 2. **Container startup failure:**

--- a/.dockerignore
+++ b/.dockerignore
@@ -42,6 +42,7 @@
 **/target/*
 **/*safetensors
 container/Dockerfile*
+container/*.Dockerfile
 .venv
 .venv-docs

--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
 name: 'Docker Build'
 description: 'Build Dynamo container images'
 inputs:
+  # --- Common Docker Inputs
  framework:
    description: 'Framework to build'
    required: true
@@ -13,9 +14,14 @@ inputs:
    description: 'Docker platform to build on, ie. linux/amd64'
    required: false
    default: 'linux/amd64'
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: true
  image_tag:
    description: 'Custom image tag (optional, defaults to framework:latest)'
    required: false
+  # --- Secret Inputs
  ci_token:
    description: 'CI Token'
    required: false
@@ -34,21 +40,6 @@ inputs:
  aws_secret_access_key:
    description: 'AWS Secret Access Key'
    required: false
-  base_image_tag:
-    description: 'Optional override for base image tag passed to build.sh'
-    required: false
-  runtime_image_tag:
-    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
-    required: false
-  cuda_version:
-    description: 'Optional override for CUDA_VERSION build-arg'
-    required: true
-  enable_kvbm:
-    description: 'Enable KVBM support (optional)'
-    required: false
-  dynamo_base_image:
-    description: 'Pre-built Dynamo base image to use instead of building from scratch'
-    required: false
 outputs:
  image_tag:
@@ -70,6 +61,44 @@ runs:
      shell: bash
      run: |
        docker system prune -af
+    - name: Set up Python
+      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
+      with:
+        python-version: '3.12'
+        pip-install: jinja2 pyyaml
+    - name: Generate Dockerfile
+      shell: bash
+      run: |
+        echo "::group::Generating Dockerfile"
+        echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+        python ./container/render.py \
+            --target=${{ inputs.target }} \
+            --framework=${{ inputs.framework }} \
+            --platform=${{ inputs.platform }} \
+            --cuda-version=${{ inputs.cuda_version }} \
+            --show-result \
+            --short-output
+        echo "::endgroup::"
+    - name: Build EPP image
+      if: ${{ inputs.target == 'frontend' }}
+      shell: bash
+      env:
+        ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
+      run: |
+        sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
+        curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
+        . "$HOME/.cargo/env"
+        echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
+        cargo install cbindgen
+        DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
+        pushd deploy/inference-gateway/epp
+        make all DOCKER_PROXY=${DOCKER_PROXY}
+        popd
+        EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
+        EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
+        echo "EPP_IMAGE=${EPP_IMAGE}" >> $GITHUB_ENV
    - name: Build image
      id: build
      shell: bash
@@ -107,50 +136,34 @@ runs:
        echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
        echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
-        # Collect optional overrides provided by the workflow
        # Set base cache args and set --cache-to if this is a main commit
        # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
-        EXTRA_ARGS=""
+        CACHE_ARGS=""
        if [[ "${{ inputs.target }}" != "frontend" ]]; then
-          EXTRA_ARGS="--cache-to type=inline "
+          CACHE_ARGS="--cache-to type=inline "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
          if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
            # Release branches also use release cache
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          fi
        fi
-        echo "$EXTRA_ARGS"
+        EPP_IMAGE_ARG=""
-        # Collect optional overrides provided by the workflow
+        if [[ ${{ inputs.target }} == "frontend" ]]; then
-        if [ -n "${{ inputs.base_image_tag }}" ]; then
+          EPP_IMAGE_ARG="--build-arg EPP_IMAGE=${EPP_IMAGE}"
-          EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
-        fi
-        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
-          EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
-        fi
-        if [ -n "${{ inputs.cuda_version }}" ]; then
-          EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
-        fi
-        if [ -n "${{ inputs.dynamo_base_image }}" ]; then
-          EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
-        fi
-        if [ -n "${{ inputs.enable_kvbm }}" ]; then
-          EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
        fi
-        # Execute build and capture output (show on console AND save to file)
+        docker buildx build \
-        ./container/build.sh --tag "$IMAGE_TAG" \
+          --progress=plain \
-          --target ${{ inputs.target }} \
+          --tag "$IMAGE_TAG" \
-          --vllm-max-jobs 10 \
+          --load \
-          --framework ${{ inputs.framework }} \
+          -f ./container/rendered.Dockerfile \
-          --platform ${{ inputs.platform }} \
+          $CACHE_ARGS \
-          --use-sccache \
+          $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
-          --sccache-bucket "$SCCACHE_S3_BUCKET" \
-          --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
        BUILD_EXIT_CODE=${PIPESTATUS[0]}

--- a/.github/actions/docker-remote-build/action.yml
+++ b/.github/actions/docker-remote-build/action.yml
@@ -13,6 +13,9 @@ inputs:
    description: 'Docker platform to build on, ie. linux/amd64'
    required: false
    default: 'linux/amd64'
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: true
  image_tag:
    description: 'Custom image tag'
    required: true
@@ -34,21 +37,6 @@ inputs:
  aws_secret_access_key:
    description: 'AWS Secret Access Key'
    required: false
-  base_image_tag:
-    description: 'Optional override for base image tag passed to build.sh'
-    required: false
-  runtime_image_tag:
-    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
-    required: false
-  cuda_version:
-    description: 'Optional override for CUDA_VERSION build-arg'
-    required: true
-  enable_kvbm:
-    description: 'Enable KVBM support (optional)'
-    required: false
-  dynamo_base_image:
-    description: 'Pre-built Dynamo base image to use instead of building from scratch'
-    required: false
  no_cache:
    description: 'Disable Docker build cache'
    required: false
@@ -117,21 +105,20 @@ runs:
        # Collect optional overrides provided by the workflow
        # Set base cache args and set --cache-to if this is a main commit
        # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
-        EXTRA_ARGS=""
+        CACHE_ARGS=""
        if [[ "${{ inputs.target }}" != "frontend" ]]; then
-          EXTRA_ARGS="--cache-to type=inline "
+          CACHE_ARGS="--cache-to type=inline "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
          if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
            # Release branches also use release cache
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          fi
        fi
+        echo "$CACHE_ARGS"
-        echo "$EXTRA_ARGS"
        # Collect optional overrides provided by the workflow
        if [[ "${{ inputs.ci }}" == "true" ]]; then
@@ -139,21 +126,6 @@ runs:
          EXTRA_ARGS+=" --ci"
        fi
-        if [ -n "${{ inputs.base_image_tag }}" ]; then
-          EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
-        fi
-        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
-          EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
-        fi
-        if [ -n "${{ inputs.cuda_version }}" ]; then
-          EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
-        fi
-        if [ -n "${{ inputs.dynamo_base_image }}" ]; then
-          EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
-        fi
-        if [ -n "${{ inputs.enable_kvbm }}" ]; then
-          EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
-        fi
        if [ "${{ inputs.no_cache }}" == "true" ]; then
          EXTRA_ARGS+=" --no-cache"
        fi
@@ -161,9 +133,9 @@ runs:
          EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
        fi
        if [ "${{ inputs.push_image }}" == "true" ]; then
-          EXTRA_ARGS+=" --push --no-load"
+          EXTRA_ARGS+=" --push"
-        elif [ "${{ inputs.no_load }}" == "true" ]; then
+        elif [ "${{ inputs.no_load }}" == "false" ]; then
-          EXTRA_ARGS+=" --no-load"
+          EXTRA_ARGS+=" --load"
        fi
        # Add extra tags (each as a separate --tag argument)
@@ -176,16 +148,14 @@ runs:
          done <<< "$EXTRA_TAGS"
        fi
-        # Execute build and capture output (show on console AND save to file)
+        docker buildx build \
-        ./container/build.sh --tag "$IMAGE_TAG" \
+          --progress=plain \
-          --target ${{ inputs.target }} \
+          --tag "$IMAGE_TAG" \
-          --vllm-max-jobs 10 \
+          --platform linux/${{ inputs.platform }} \
-          --no-tag-latest \
+          -f ./container/rendered.Dockerfile \
-          --framework ${{ inputs.framework }} \
+          $CACHE_ARGS \
-          --platform ${{ inputs.platform }} \
+          $EXTRA_ARGS \
-          --sccache-bucket "$SCCACHE_S3_BUCKET" \
+          $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
-          --sccache-region "$AWS_DEFAULT_REGION" \
-          $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
        BUILD_EXIT_CODE=${PIPESTATUS[0]}

--- a/.github/filters.yaml
+++ b/.github/filters.yaml
@@ -63,6 +63,9 @@ ignore:
  - 'container/run.sh'
  - 'container/use-sccache.sh'
  - 'container/dev/**'
+  - 'container/templates/aws.Dockerfile'
+  - 'container/templates/local_dev.Dockerfile'
+  - 'container/templates/dev.Dockerfile'
 ci: &ci
  - '.github/workflows/**'
@@ -78,6 +81,12 @@ core:
  - *ci
  - 'container/build.sh'
  - 'container/Dockerfile'
+  - 'container/render.py'
+  - 'container/Dockerfile.template'
+  - 'container/context.yaml'
+  - 'container/templates/args.Dockerfile'
+  - 'container/templates/dynamo_*'
+  - 'container/templates/wheel_builder.Dockerfile'
  - '.dockerignore'
  - 'container/deps/*'
  - '.cargo/config.toml'
@@ -120,6 +129,7 @@ vllm:
  - 'container/deps/vllm/**'
  - 'examples/backends/vllm/**'
  - 'components/src/dynamo/vllm/**'
+  - 'container/templates/vllm_*'
 sglang:
  - '!**/*.md'
@@ -127,6 +137,7 @@ sglang:
  - 'container/Dockerfile.sglang'
  - 'examples/backends/sglang/**'
  - 'components/src/dynamo/sglang/**'
+  - 'container/templates/sglang_*'
 trtllm:
  - '!**/*.md'
@@ -136,6 +147,7 @@ trtllm:
  - 'examples/backends/trtllm/**'
  - 'components/src/dynamo/trtllm/**'
  - 'container/build_trtllm_wheel.sh'
+  - 'container/templates/trtllm_*'
 frontend:
  - '!**/*.md'
@@ -153,6 +165,7 @@ frontend:
  - 'components/src/dynamo/frontend/**'
  - 'components/src/dynamo/common/**'
  - 'deploy/inference-gateway/**'
+  - 'container/templates/frontend.Dockerfile'
 rust:
  - '.github/workflows/pre-merge.yml'

--- a/.github/workflows/build-frontend-image.yaml
+++ b/.github/workflows/build-frontend-image.yaml
@@ -68,21 +68,6 @@ jobs:
        uses: actions/setup-go@v5
        with:
          go-version: '1.24'
-      - name: Install dependencies
-        shell: bash
-        run: |
-          set -euo pipefail
-          # Install system dependencies from apt
-          sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
-          # Install Rust (cargo + rustc)
-          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
-          # Make cargo available to later steps
-          echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
-      - name: Install cbindgen
-        shell: bash
-        run: |
-          set -euo pipefail
-          cargo install cbindgen
      - name: Docker Login
        uses: ./.github/actions/docker-login
        with:
@@ -95,10 +80,10 @@ jobs:
        id: build-image
        uses: ./.github/actions/docker-build
        env:
-          PLATFORMS: linux/${{ matrix.platform.arch }}
+          PLATFORMS: ${{ matrix.platform.arch }}
          TARGETARCH: ${{ matrix.platform.arch }}
        with:
-          framework: none
+          framework: dynamo
          target: frontend
          platform: ${{ env.PLATFORMS }}
          ci_token: ${{ secrets.CI_TOKEN }}

--- a/.github/workflows/build-test-distribute-flavor-matrix.yml
+++ b/.github/workflows/build-test-distribute-flavor-matrix.yml
@@ -10,6 +10,10 @@ on:
        description: 'Framework name (vllm, sglang, trtllm)'
        required: true
        type: string
+      target:
+        description: 'Target stage for Docker rendering'
+        required: true
+        type: string
      platforms:
        description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
        required: true
@@ -86,6 +90,7 @@ jobs:
    with:
      framework: ${{ inputs.framework }}
      platform: ${{ matrix.platform }}
+      target: ${{ inputs.target }}
      cuda_version: ${{ matrix.cuda_version }}
      extra_tags: ${{ inputs.extra_tags }}
      no_cache: ${{ inputs.no_cache }}

--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -10,6 +10,10 @@ on:
        description: 'Framework name (vllm, sglang, trtllm)'
        required: true
        type: string
+      target:
+        description: 'Target stage for Docker rendering'
+        required: true
+        type: string
      platform:
        description: 'Platform to build (amd64 or arm64)'
        required: true
@@ -105,13 +109,17 @@ jobs:
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
        with:
          lfs: true
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
+        with:
+          python-version: '3.12'
+          pip-install: jinja2 pyyaml
      - name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
        id: extra-tags
        shell: bash
        env:
          EXTRA_TAGS: ${{ inputs.extra_tags }}
-          PLATFORM: ${{ inputs.platform }}
+          PLATFORM: linux/${{ inputs.platform }}
        run: |
          if [ -n "$EXTRA_TAGS" ]; then
            RESULT=""
@@ -134,7 +142,6 @@ jobs:
          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
      - name: Calculate target tag
        id: calculate-target-tag
        shell: bash
@@ -146,7 +153,6 @@ jobs:
          echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
          echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
      - name: Initialize Dynamo Builder
        uses: ./.github/actions/init-dynamo-builder
        with:
@@ -154,28 +160,37 @@ jobs:
          flavor: ${{ inputs.framework }}
          arch: ${{ inputs.platform }}
          cuda_version: ${{ inputs.cuda_version }}
      - name: Print Build Container inputs
        run: |
          echo "=== Build Container Inputs ==="
          echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
          echo "framework: ${{ inputs.framework }}"
          echo "target: runtime"
-          echo "platform: linux/${{ inputs.platform }}"
+          echo "platform: ${{ inputs.platform }}"
          echo "cuda_version: ${{ inputs.cuda_version }}"
          echo "no_cache: ${{ inputs.no_cache }}"
          echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
          echo "push_image: ${{ inputs.push_image }}"
          echo "no_load: ${{ inputs.no_load }}"
+      - name: Generate Dockerfile
+        shell: bash
+        run: |
+          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          python ./container/render.py \
+              --target=${{ inputs.target }} \
+              --framework=${{ inputs.framework }} \
+              --platform=${{ inputs.platform }} \
+              --cuda-version=${{ inputs.cuda_version }} \
+              --show-result \
+              --short-output
      - name: Build Container
        id: build-image
        uses: ./.github/actions/docker-remote-build
        with:
          image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
          framework: ${{ inputs.framework }}
-          target: runtime
+          target: ${{ inputs.target }}
-          platform: linux/${{ inputs.platform }}
+          platform: ${{ inputs.platform }}
          cuda_version: ${{ inputs.cuda_version }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}

--- a/.github/workflows/ci-test-suite.yml
+++ b/.github/workflows/ci-test-suite.yml
@@ -99,17 +99,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/amd64
+          platform: amd64
-          base_image_tag: ''
+          cuda_version: '12.9'
-          runtime_image_tag: ''
+          image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
-          cuda_version: ''
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
      - name: Tag and Push Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -132,13 +130,9 @@ jobs:
      matrix:
        include:
          - framework: vllm
-            base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
-            runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
            cuda_version: '12.9'
          - framework: sglang
-            base_image_tag: ''
+            cuda_version: '12.9'
-            runtime_image_tag: ''
-            cuda_version: ''
    env:
      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
    steps:
@@ -158,17 +152,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/arm64
+          platform: arm64
-          base_image_tag: ${{ matrix.base_image_tag }}
-          runtime_image_tag: ${{ matrix.runtime_image_tag }}
          cuda_version: ${{ matrix.cuda_version }}
+          image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
      - name: Tag and Push Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -210,17 +202,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/amd64
+          platform: amd64
-          base_image_tag: ''
-          runtime_image_tag: ''
          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
+          image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
      - name: Tag and Push CUDA 13 Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -262,17 +252,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/arm64
+          platform: arm64
-          base_image_tag: ''
-          runtime_image_tag: ''
          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
+          image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
      - name: Tag and Push CUDA 13 Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:

--- a/.github/workflows/container-validation-dynamo.yml
+++ b/.github/workflows/container-validation-dynamo.yml
@@ -66,11 +66,27 @@ jobs:
        id: define_image_tag
        run: |
          echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
+      - name: Generate Dockerfile
+        shell: bash
+        run: |
+          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          python ./container/render.py \
+              --target=dev \
+              --framework=dynamo \
+              --platform=amd64 \
+              --show-result \
+              --short-output
      - name: Build image
        env:
          GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
        run: |
-          ./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg
+          docker buildx build \
+            --progress=plain \
+            --tag ${{ steps.define_image_tag.outputs.image_tag }} \
+            -f ./container/rendered.Dockerfile \
+            --build-arg ENABLE_MEDIA_FFMPEG=true \
+            --build-arg ENABLE_KVBM=true \
+            --load .
      - name: Start services with docker-compose
        working-directory: ./deploy
        run: |

--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -177,6 +177,7 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: vllm
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["12.9", "13.0"]'
      extra_tags: |
@@ -194,6 +195,7 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: sglang
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["12.9", "13.0"]'
      extra_tags: |
@@ -211,11 +213,12 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: trtllm
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["13.1"]'
      extra_tags: |
        ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
-        ${{ github.ref_name == 'main' && format('main-trt-llm-{0}', github.sha) || '' }}
+        ${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
    secrets: inherit

--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 .vs/
 .vscode/
 .helix
+*rendered.Dockerfile
 [Bb]inlog/
 [Bb][Uu][Ii][Ll][Dd]/
 [Oo][Bb][Jj]/

--- a/container/Dockerfile
+++ b/container/Dockerfile
-# syntax=docker/dockerfile:1.10.0
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# NOTE FOR dynamo_base AND wheel_builder STAGES:
-#
-# All changes to dynamo_base and wheel_builder stages should be replicated across
-# Dockerfile and Dockerfile.<framework> images.:
-#   - Dockerfile
-#   - Dockerfile.vllm
-#   - Dockerfile.sglang
-#   - Dockerfile.trtllm
-# This duplication was introduced purposely to quickly enable Docker layer caching and
-# deduplication. Please ensure these stages stay in sync until the duplication can be
-# addressed.
-##################################
-########## Build Arguments ########
-##################################
-# This section contains build arguments that are common and shared across various
-# Dockerfile.<frameworks>, so they should NOT have a default. The source of truth is from build.sh.
-ARG BASE_IMAGE
-ARG BASE_IMAGE_TAG
-ARG EPP_IMAGE="us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1"
-ARG PYTHON_VERSION
-ARG ENABLE_KVBM
-ARG ENABLE_GPU_MEMORY_SERVICE
-ARG ENABLE_MEDIA_NIXL
-ARG ENABLE_MEDIA_FFMPEG
-ARG CARGO_BUILD_JOBS
-# Define general architecture ARGs for supporting both x86 and aarch64 builds.
-#   ARCH: Used for package suffixes (e.g., amd64, arm64)
-#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
-#
-# Default values are for x86/amd64:
-#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
-#
-# For arm64/aarch64, build with:
-#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
-#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
-ARG ARCH=amd64
-ARG ARCH_ALT=x86_64
-# SCCACHE configuration
-ARG USE_SCCACHE
-ARG SCCACHE_BUCKET=""
-ARG SCCACHE_REGION=""
-# NIXL configuration
-ARG NIXL_UCX_REF
-ARG NIXL_REF
-ARG NIXL_GDRCOPY_REF
-ARG NIXL_LIBFABRIC_REF
-##################################
-########## Base Image ############
-##################################
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
-ARG ARCH
-ARG ARCH_ALT
-USER root
-WORKDIR /opt/dynamo
-# Install uv package manager
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-# Install NATS server
-ENV NATS_VERSION="v2.10.28"
-RUN --mount=type=cache,target=/var/cache/apt \
-    wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
-    dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
-# Install etcd
-ENV ETCD_VERSION="v3.5.21"
-RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
-    mkdir -p /usr/local/bin/etcd && \
-    tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
-    rm /tmp/etcd.tar.gz
-ENV PATH=/usr/local/bin/etcd/:$PATH
-# Rust Setup
-# Rust environment setup
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH \
-    RUST_VERSION=1.90.0
-# Define Rust target based on ARCH_ALT ARG
-ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
-# Install Rust
-RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
-    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
-    rm rustup-init && \
-    chmod -R a+w $RUSTUP_HOME $CARGO_HOME
-##################################
-##### Wheel Build Image ##########
-##################################
-# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
-ARG ARCH_ALT
-FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
-# Redeclare ARGs for this stage
-ARG ARCH
-ARG ARCH_ALT
-ARG CARGO_BUILD_JOBS
-ARG ENABLE_MEDIA_FFMPEG
-WORKDIR /workspace
-# Copy CUDA from base stage
-COPY --from=base /usr/local/cuda /usr/local/cuda
-COPY --from=base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
-# Set environment variables first so they can be used in COPY commands
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
-    RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    CARGO_TARGET_DIR=/opt/dynamo/target \
-    PATH=/usr/local/cargo/bin:$PATH
-# Copy artifacts from base stage
-COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
-COPY --from=base $CARGO_HOME $CARGO_HOME
-# Install system dependencies
-RUN dnf install -y almalinux-release-synergy && \
-    dnf config-manager --set-enabled powertools && \
-    dnf install -y \
-        # Autotools (required for UCX, libfabric ./autogen.sh and ./configure)
-        autoconf \
-        automake \
-        libtool \
-        make \
-        # RPM build tools (required for gdrcopy's build-rpm-packages.sh)
-        rpm-build \
-        rpm-sign \
-        # Build tools
-        cmake \
-        ninja-build \
-        clang-devel \
-        # Install GCC toolset 14 (CUDA compatible, max version 14)
-        gcc-toolset-14-gcc \
-        gcc-toolset-14-gcc-c++ \
-        gcc-toolset-14-binutils \
-        flex \
-        wget \
-        # Kernel module build dependencies
-        dkms \
-        # Protobuf support
-        protobuf-compiler \
-        # RDMA/InfiniBand support (required for UCX build with --with-verbs)
-        libibverbs \
-        libibverbs-devel \
-        rdma-core \
-        rdma-core-devel \
-        libibumad \
-        libibumad-devel \
-        librdmacm-devel \
-        numactl-devel \
-        # Libfabric support
-        hwloc \
-        hwloc-devel && \
-    dnf clean all && rm -rf /var/cache/dnf/
-# Set GCC toolset 14 as the default compiler (CUDA requires GCC <= 14)
-ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
-    LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
-    CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
-    CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
-# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
-RUN set -eux; \
-    PROTOC_VERSION=25.3; \
-    case "${ARCH_ALT}" in \
-      x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
-      aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
-      *) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
-    esac; \
-    wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
-    rm -f /usr/local/bin/protoc /usr/bin/protoc; \
-    unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
-    chmod +x /usr/local/bin/protoc; \
-    ln -s /usr/local/bin/protoc /usr/bin/protoc; \
-    protoc --version
-# Point build tools explicitly at the modern protoc
-ENV PROTOC=/usr/local/bin/protoc
-ENV CUDA_PATH=/usr/local/cuda \
-    PATH=/usr/local/cuda/bin:$PATH \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
-    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
-# Create virtual environment for building wheels
-ARG PYTHON_VERSION
-ENV VIRTUAL_ENV=/workspace/.venv
-# Cache uv downloads; uv handles its own locking for this cache.
-RUN --mount=type=cache,target=/root/.cache/uv \
-    export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
-    uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
-    uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
-ARG NIXL_UCX_REF
-ARG NIXL_REF
-ARG NIXL_GDRCOPY_REF
-# Build and install gdrcopy
-RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
-    cd gdrcopy/packages && \
-    CUDA=/usr/local/cuda ./build-rpm-packages.sh && \
-    rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
-    rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
-    rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
-# Install SCCACHE if requested
-ARG USE_SCCACHE
-ARG SCCACHE_BUCKET
-ARG SCCACHE_REGION
-COPY container/use-sccache.sh /tmp/use-sccache.sh
-RUN if [ "$USE_SCCACHE" = "true" ]; then \
-        /tmp/use-sccache.sh install; \
-    fi
-# Set SCCACHE environment variables
-ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
-    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
-    RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
-# Build FFmpeg from source
-# Do not delete the source tarball for legal reasons
-ARG FFMPEG_VERSION=7.1
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
-    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export RUSTC_WRAPPER="sccache"; \
-    fi && \
-    dnf install -y pkg-config && \
-    cd /tmp && \
-    curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
-    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
-    cd ffmpeg-${FFMPEG_VERSION} && \
-    ./configure \
-        --prefix=/usr/local \
-        --disable-gpl \
-        --disable-nonfree \
-        --disable-programs \
-        --disable-doc \
-        --disable-static \
-        --disable-x86asm \
-        --disable-postproc \
-        --disable-network \
-        --disable-encoders \
-        --disable-muxers \
-        --disable-bsfs \
-        --disable-devices \
-        --disable-libdrm \
-        --enable-shared && \
-    make -j$(nproc) && \
-    make install && \
-    /tmp/use-sccache.sh show-stats "FFMPEG" && \
-    ldconfig && \
-    mkdir -p /usr/local/src/ffmpeg && \
-    mv /tmp/ffmpeg-${FFMPEG_VERSION}* /usr/local/src/ffmpeg/; \
-fi
-# Build and install UCX
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
-    fi && \
-    cd /usr/local/src && \
-     git clone https://github.com/openucx/ucx.git && \
-     cd ucx && 			     \
-     git checkout $NIXL_UCX_REF &&	 \
-     ./autogen.sh &&      \
-     ./contrib/configure-release    \
-        --prefix=/usr/local/ucx     \
-        --enable-shared             \
-        --disable-static            \
-        --disable-doxygen-doc       \
-        --enable-optimizations      \
-        --enable-cma                \
-        --enable-devel-headers      \
-        --with-cuda=/usr/local/cuda \
-        --with-verbs                \
-        --with-dm                   \
-        --with-gdrcopy=/usr/local   \
-        --with-efa                  \
-        --enable-mt &&              \
-     make -j &&                      \
-     make -j install-strip &&        \
-     /tmp/use-sccache.sh show-stats "UCX" && \
-     echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
-     echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
-     ldconfig
-ARG NIXL_LIBFABRIC_REF
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
-    fi && \
-    cd /usr/local/src && \
-    git clone https://github.com/ofiwg/libfabric.git && \
-    cd libfabric && \
-    git checkout $NIXL_LIBFABRIC_REF && \
-    ./autogen.sh && \
-    ./configure --prefix="/usr/local/libfabric" \
-                --disable-verbs \
-                --disable-psm3 \
-                --disable-opx \
-                --disable-usnic \
-                --disable-rstream \
-                --enable-efa \
-                --with-cuda=/usr/local/cuda \
-                --enable-cuda-dlopen \
-                --with-gdrcopy \
-                --enable-gdrcopy-dlopen && \
-    make -j$(nproc) && \
-    make install && \
-    /tmp/use-sccache.sh show-stats "LIBFABRIC" && \
-    echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
-    ldconfig
-# build and install nixl
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
-    fi && \
-    source ${VIRTUAL_ENV}/bin/activate && \
-    git clone "https://github.com/ai-dynamo/nixl.git" && \
-    cd nixl && \
-    git checkout ${NIXL_REF} && \
-    CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
-    if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
-        echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
-        exit 1; \
-    fi && \
-    PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
-    ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
-    mkdir build && \
-    meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-    -Dcudapath_lib="/usr/local/cuda/lib64" \
-    -Dcudapath_inc="/usr/local/cuda/include" \
-    -Ducx_path="/usr/local/ucx" \
-    -Dlibfabric_path="/usr/local/libfabric" && \
-    cd build && \
-    ninja && \
-    ninja install && \
-    /tmp/use-sccache.sh show-stats "NIXL"
-ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64  \
-    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
-    NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
-RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
-    echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
-    ldconfig
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    --mount=type=cache,target=/root/.cache/uv \
-    export UV_CACHE_DIR=/root/.cache/uv && \
-    export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
-    fi && \
-    cd /workspace/nixl && \
-    uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
-# Copy source code (order matters for layer caching)
-COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
-COPY launch/ /opt/dynamo/launch/
-COPY lib/ /opt/dynamo/lib/
-COPY components/ /opt/dynamo/components/
-# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
-ARG ENABLE_KVBM
-RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
-    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
-    --mount=type=cache,target=/root/.cargo/registry \
-    --mount=type=cache,target=/root/.cargo/git \
-    --mount=type=cache,target=/root/.cache/uv \
-    export UV_CACHE_DIR=/root/.cache/uv && \
-    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
-    if [ "$USE_SCCACHE" = "true" ]; then \
-        export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
-        export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
-        export RUSTC_WRAPPER="sccache"; \
-    fi && \
-    source ${VIRTUAL_ENV}/bin/activate && \
-    cd /opt/dynamo && \
-    uv build --wheel --out-dir /opt/dynamo/dist && \
-    cd /opt/dynamo/lib/bindings/python && \
-    FEATURES=""; \
-    if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
-        FEATURES="$FEATURES dynamo-llm/media-nixl"; \
-    fi; \
-    if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
-        FEATURES="$FEATURES media-ffmpeg"; \
-    fi; \
-    if [ -n "$FEATURES" ]; then \
-        maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
-    else \
-        maturin build --release --out /opt/dynamo/dist; \
-    fi && \
-    if [ "$ENABLE_KVBM" = "true" ]; then \
-        cd /opt/dynamo/lib/bindings/kvbm && \
-        maturin build --release --out target/wheels && \
-        auditwheel repair \
-            --exclude libnixl.so \
-            --exclude libnixl_build.so \
-            --exclude libnixl_common.so \
-            --exclude 'lib*.so*' \
-            --plat manylinux_2_28_${ARCH_ALT} \
-            --wheel-dir /opt/dynamo/dist \
-            target/wheels/*.whl; \
-    fi && \
-    /tmp/use-sccache.sh show-stats "Dynamo"
-# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
-ARG ENABLE_GPU_MEMORY_SERVICE
-RUN if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
-        source ${VIRTUAL_ENV}/bin/activate && \
-        uv build --wheel --out-dir /opt/dynamo/dist /opt/dynamo/lib/gpu_memory_service; \
-    fi
-##############################################
-########## Runtime image ##############
-##############################################
-FROM base AS runtime
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-# Create dynamo user with group 0 for OpenShift compatibility
-RUN userdel -r ubuntu > /dev/null 2>&1 || true \
-    && useradd -m -s /bin/bash -g 0 dynamo \
-    && [ `id -u dynamo` -eq 1000 ] \
-    && mkdir -p /home/dynamo/.cache /opt/dynamo \
-    # Non-recursive chown - only the directories themselves, not contents
-    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
-    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
-    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
-    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
-    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
-# NIXL environment variables
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
-    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
-    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
-    CARGO_TARGET_DIR=/opt/dynamo/target
-# Copy ucx and nixl libs
-COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
-COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
-COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
-COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
-COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
-# Copy ffmpeg
-RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
-    cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
-    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
-    cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
-    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/; \
-    true # in case ffmpeg not enabled
-# Copy built artifacts
-COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
-COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
-# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
-# This is needed to create venv and install dynamo packages
-ARG PYTHON_VERSION
-# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        python${PYTHON_VERSION}-dev \
-        python${PYTHON_VERSION}-venv && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/* && \
-    ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
-# Switch to dynamo user and create virtual environment
-USER dynamo
-ENV HOME=/home/dynamo
-# Create and activate virtual environment
-# Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
-SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
-# Cache uv downloads; uv handles its own locking for the cache.
-RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
-    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
-    uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
-ENV VIRTUAL_ENV=/opt/dynamo/venv \
-    PATH="/opt/dynamo/venv/bin:${PATH}"
-# Install dynamo wheels (runtime packages only, no test dependencies)
-# uv handles its own locking for the cache, no need to add sharing=locked
-ARG ENABLE_KVBM
-ARG ENABLE_GPU_MEMORY_SERVICE
-RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
-    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
-    uv pip install \
-    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
-    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
-    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
-    if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
-        GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
-        if [ -z "$GMS_WHEEL" ]; then \
-            echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
-            exit 1; \
-        fi; \
-        uv pip install "$GMS_WHEEL"; \
-    fi && \
-    if [ "$ENABLE_KVBM" = "true" ]; then \
-        KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
-        if [ -z "$KVBM_WHEEL" ]; then \
-            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
-            exit 1; \
-        fi; \
-        uv pip install "$KVBM_WHEEL"; \
-    fi
-ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
-##############################################
-########## Frontend entrypoint image #########
-##############################################
-FROM ${EPP_IMAGE} AS epp
-FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS frontend
-ARG PYTHON_VERSION
-# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    apt-get update -y \
-    && apt-get install -y --no-install-recommends \
-        # required for EPP
-        ca-certificates \
-        libstdc++6 \
-        # required for verification of GPG keys
-        gnupg2 \
-        # required for installing dependencies from git repositories
-        git \
-        git-lfs \
-        # Python runtime - required for virtual environment to work
-        python${PYTHON_VERSION}-dev \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-# Create dynamo user with group 0 for OpenShift compatibility
-RUN userdel -r ubuntu > /dev/null 2>&1 || true \
-    && useradd -m -s /bin/bash -g 0 dynamo \
-    && [ `id -u dynamo` -eq 1000 ] \
-    && mkdir -p /home/dynamo/.cache /opt/dynamo /workspace \
-    && chown -R dynamo: /opt/dynamo /home/dynamo/.cache /workspace \
-    && chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
-# Set HOME so ModelExpress can find the cache directory
-ENV HOME=/home/dynamo
-# Switch to dynamo user
-USER dynamo
-ENV DYNAMO_HOME=/opt/dynamo
-WORKDIR /
-COPY --chown=dynamo: --from=epp /epp /epp
-COPY --chown=dynamo: container/launch_message/frontend.txt /opt/dynamo/.launch_screen
-# Copy tests, benchmarks, deploy and components with correct ownership
-COPY --chown=dynamo: tests /workspace/tests
-COPY --chown=dynamo: examples /workspace/examples
-COPY --chown=dynamo: benchmarks /workspace/benchmarks
-COPY --chown=dynamo: deploy /workspace/deploy
-COPY --chown=dynamo: components/ /workspace/components/
-COPY --chown=dynamo: recipes/ /workspace/recipes/
-# Copy attribution files with correct ownership
-COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
-ENV VIRTUAL_ENV=/opt/dynamo/venv
-ENV PATH="/opt/dynamo/venv/bin:$PATH"
-# Copy uv and wheelhouse from runtime stage
-COPY --chown=dynamo: --from=runtime /bin/uv /bin/uvx /bin/
-COPY --chown=dynamo: --from=runtime /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
-# Create virtual environment
-RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
-    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
-    mkdir -p /opt/dynamo/venv && \
-    uv venv /opt/dynamo/venv --python $PYTHON_VERSION
-# Install common and test dependencies. In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
-    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
-    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
-    uv pip install \
-        --requirement /tmp/requirements.txt \
-        --requirement /tmp/requirements.test.txt
-ARG ENABLE_KVBM
-ARG ENABLE_GPU_MEMORY_SERVICE
-# In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
-RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
-    export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
-    uv pip install \
-    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
-    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
-    /opt/dynamo/wheelhouse/nixl/nixl*.whl && \
-    if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
-        GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
-        if [ -z "$GMS_WHEEL" ]; then \
-            echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
-            exit 1; \
-        fi; \
-        uv pip install "$GMS_WHEEL"; \
-    fi && \
-    if [ "$ENABLE_KVBM" = "true" ]; then \
-        KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
-        if [ -z "$KVBM_WHEEL" ]; then \
-            echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
-            exit 1; \
-        fi; \
-        uv pip install "$KVBM_WHEEL"; \
-    fi && \
-    cd /workspace/benchmarks && \
-    export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
-    uv pip install .
-# Setup environment for all users
-USER root
-RUN chmod 755 /opt/dynamo/.launch_screen && \
-    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
-    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
-USER dynamo
-ENTRYPOINT ["/epp"]
-CMD ["/bin/bash"]
--- a/container/Dockerfile.template
+++ b/container/Dockerfile.template
+# syntax=docker/dockerfile:1.10.0-labs
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+{% include "templates/args.Dockerfile" %}
+# --- Base Image Stages
+{% if framework != "dynamo" %}
+    {% include "templates/dynamo_base.Dockerfile" %}
+    {% include "templates/wheel_builder.Dockerfile" %}
+{% elif framework == "dynamo" %}
+    {% if target == "frontend" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+        {% include "templates/dynamo_runtime.Dockerfile" %}
+        {% include "templates/frontend.Dockerfile" %}
+    {% elif target == "runtime" or target == "dev" or target == "local-dev" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+        {% include "templates/dynamo_runtime.Dockerfile" %}
+    {% elif target == "wheel_builder" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+    {% elif target == "base" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+    {% endif %}
+{% endif %}
+# --- Framework Stages
+{% if framework == "sglang" %}
+    # SGLang is the only framework without a `framework` target currently, needs special treatment
+    {% include "templates/sglang_runtime.Dockerfile" %}
+{% elif target == "framework" and framework != "dynamo" %}
+    {% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
+{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
+    {% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
+    {% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
+{% else %}
+# --- No framework stages included
+{% endif %}
+{% if make_efa == true and target == "runtime" %}
+    {% include "templates/aws.Dockerfile" %}
+{% endif %}
+# --- Development Stages
+{% if target == "dev" or target == "local-dev" %}
+    {% include "templates/dev.Dockerfile" %}
+    {% if make_efa == true %}
+        {% include "templates/aws.Dockerfile" %}
+    {% endif %}
+    {% if target == "local-dev" %}
+        {% include "templates/local_dev.Dockerfile" %}
+    {% endif %}
+{% else %}
+# --- No development stages included
+{% endif %}
--- a/container/README.md
+++ b/container/README.md
@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint
 ### Core Components
- **`build.sh`** - A Docker image builder that creates containers for different AI inference frameworks (vLLM, TensorRT-LLM, SGLang). It handles framework-specific dependencies, multi-stage builds, and development vs production configurations.
+- **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
 - **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
- **Multiple Dockerfiles** - Framework-specific Dockerfiles that define the container images:
+## Rendering Requirements:
-  - `Dockerfile.vllm` - For vLLM inference backend
+- Python
-  - `Dockerfile.trtllm` - For TensorRT-LLM inference backend
+- Python Packages:
-  - `Dockerfile.sglang` - For SGLang inference backend
+  - pyyaml
-  - `Dockerfile` - Base/standalone configuration
+  - jinja2
-  - `Dockerfile.epp` - For building the Endpoint Picker (EPP) image
 ### Stage Summary for Frameworks
 <details>
 <summary>Show Stage Summary Table</summary>
-Dockerfile.${FRAMEWORK} General Structure
+Dockerfile General Structure
 Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist.
@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh
 ### Convenience Scripts vs Direct Docker Commands
-The `build.sh` and `run.sh` scripts are convenience wrappers that simplify common Docker operations. They automatically handle:
+The `run.sh` script and rendering scripts are convenience that simplify common Docker operations. They automatically handle:
- Framework-specific image selection and tagging
 - GPU access configuration and runtime selection
 - Volume mount setup for development workflows
 - Environment variable management
 - Build argument construction for multi-stage builds
-**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The scripts use `--dry-run` flags to show you the exact Docker commands they would execute, making it easy to understand and modify the underlying operations.
+**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The `run.sh` uses a `--dry-run` flag to show you the exact commands they would execute, making it easy to understand and modify the underlying operations.
 ## Development Targets Feature Matrix
@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
 ### 1. runtime target (runs as non-root dynamo user):
 ```bash
 # Build runtime image
-./build.sh --framework vllm --target runtime
+python container/render.py --framework vllm --target runtime --short-output
+docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
 # Run runtime container
-./run.sh --image dynamo:latest-vllm-runtime -it
+container/run.sh --image dynamo:latest-vllm-runtime -it
 ```
 ### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID):
@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration.
 ## Build and Run Scripts Overview
-### build.sh - Docker Image Builder
+### render.py - Docker Image Generator
-The `build.sh` script is responsible for building Docker images for different AI inference frameworks. It supports multiple frameworks and configurations:
+The `render.py` script is responsible for generating Dockerfiles for different AI inference frameworks. It supports multiple frameworks and configurations:
 **Purpose:**
- Builds Docker images for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
+- Generates Dockerfiles for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
 - Handles framework-specific dependencies and optimizations
 - Manages build contexts, caching, and multi-stage builds
 - Configures development vs production targets
 **Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE
+- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE (standalone Dynamo)
 - **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`.
+- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `render.py`.
 - **Build Caching**: Docker layer caching and sccache support
 - **GPU Optimization**: CUDA, EFA, and NIXL support
@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles):
 Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
-**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
-  - the selected framework Dockerfile, then
-  - `container/dev/Dockerfile.dev`
-  `build.sh` then continues building normally using the temp Dockerfile path.
-**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
 > **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
 **Common Usage Examples:**
 ```bash
 # Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
-./build.sh
+python container/render.py --framework=vllm --target=dev --short-output
+docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
 # Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
 # which is useful when mounting partitions for development.
-./build.sh --framework vllm --target local-dev
+python container/render.py --framework=vllm --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
 # Build TensorRT-LLM development image called dynamo:latest-trtllm
-./build.sh --framework trtllm
+python container/render.py --framework=trtllm --target=runtime --short-output
+docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
-# Build with custom tag
-./build.sh --framework sglang --tag my-custom-tag
-# Dry run to see commands
-./build.sh --dry-run
-# Build with no cache
-./build.sh --no-cache
-# Build with build arguments
-./build.sh --build-arg CUSTOM_ARG=value
 ```
 ### Building the Frontend Image
 The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments.
+**Build EPP Image**
+```bash
+sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
+curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
+. "$HOME/.cargo/env"
+cargo install cbindgen
+pushd deploy/inference-gateway/epp
+make all
+popd
+EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
+EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
+```
+**Build Frontend Image**
 ```bash
 # Build the frontend image (automatically builds EPP image as a dependency)
-./build.sh --framework none --target frontend
+python container/render.py --framework=dynamo --target=frontend --short-output
+docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
 ```
 The build process automatically:
@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio
 ```bash
 # Basic container launch with dev image (runs as root by default, non-interactive)
-./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
+container/run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
 # Interactive development with workspace mounted using dev image (runs as root)
-./run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Interactive development with local-dev image (runs as dynamo user with matched host UID/GID)
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Use specific image and framework for development
-./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Interactive development shell with workspace mounted (local-dev)
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
 # Development with custom environment variables
-./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Dry run to see docker command
-./run.sh --dry-run
+container/run.sh --dry-run
 # Development with custom volume mounts
-./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Run runtime image as non-root dynamo user (for production)
-./run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
 # Run dev image as specific user (override default root)
-./run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
 ```
 ### Network Configuration Options
@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### Host Networking (Default)
 ```bash
 # Examples with dynamo user
-./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
-./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
 ```
 **Use cases:**
 - High-performance ML inference (default for GPU workloads)
@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### Bridge Networking (Isolated)
 ```bash
 # CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI)
-./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
 ```
 **Use cases:**
 - Secure isolation from host network
@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### No Networking ⚠️ **LIMITED FUNCTIONALITY**
 ```bash
 # Complete network isolation - no external connectivity
-./run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Same with local-dev image (dynamo user with matched host UID/GID)
-./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 ```
 **⚠️ WARNING: `--network none` severely limits Dynamo functionality:**
 - **No model downloads** - HuggingFace models cannot be downloaded
@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management.
 ### Development Workflow
 ```bash
 # 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
-./build.sh --framework vllm --target local-dev
+python container/render.py --framework=vllm --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
 # 2. Run development container using the local-dev image
 # RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
 # 3. Inside container, run inference (requires both frontend and backend)
 # Start frontend
@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
 ### Production Workflow
 ```bash
 # 1. Build production runtime image (runs as non-root dynamo user)
-./build.sh --framework vllm --target runtime
+python container/render.py --framework=vllm --target=runtime --short-output
+docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
 # 2. Run production container as non-root dynamo user
-./run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
 ```
 ### Testing Workflow
 ```bash
 # 1. Build dev image
-./build.sh --framework vllm --no-cache
+python container/render.py --framework=vllm --target=dev --short-output
+docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
 # 2. Run tests with network isolation for reproducible results (no -it needed for CI)
-./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
+container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
 # 3. Inside the container with bridge networking, start services
 # Note: Services are only accessible from the same container - no port conflicts with host

--- a/container/build.sh
+++ b/container/build.sh
-#!/usr/bin/env bash
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
-    echo "Error: Bash version 4.0 or higher is required. Current version: ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}"
-    exit 1
-fi
-set -e
-TAG=
-PRIMARY_TAG=
-RUN_PREFIX=
-PLATFORM=linux/amd64
-# Get short commit hash
-commit_id=${commit_id:-$(git rev-parse --short HEAD)}
-# if COMMIT_ID matches a TAG use that
-current_tag=${current_tag:-$(git describe --tags --exact-match 2>/dev/null | sed 's/^v//' || true)}
-# Get latest version from release branches or tags
-# Strategy:
-# 1. Check for release/X.Y.Z branches (most reliable for development)
-# 2. Fall back to git tags, excluding test-rc tags
-# 3. Default to 0.0.1 if nothing found
-# Try to find the latest release branch first
-latest_release_branch=$(git branch -r 2>/dev/null | grep -E 'origin/release/[0-9]+\.[0-9]+\.[0-9]+$' | sed 's|.*/||' | sort -V | tail -1 || true)
-if [[ -n ${latest_release_branch} ]]; then
-    latest_tag=${latest_tag:-$latest_release_branch}
-    echo "INFO: Using version from latest release branch: ${latest_tag}"
-else
-    # Fall back to tags, excluding test-rc tags
-    latest_tag=${latest_tag:-$(git tag -l 'v*' --sort=-version:refname | grep -v 'test-rc' | head -1 | sed 's/^v//' || true)}
-fi
-if [[ -z ${latest_tag} ]]; then
-    latest_tag="0.0.1"
-    echo "No git release tag or branch found, setting to unknown version: ${latest_tag}"
-fi
-# Use tag if available, otherwise use latest_tag.dev.commit_id
-VERSION=v${current_tag:-$latest_tag.dev.$commit_id}
-PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
-# Frameworks
-#
-# Each framework has a corresponding base image.  Additional
-# dependencies are specified in the /container/deps folder and
-# installed within framework specific sections of the Dockerfile.
-declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
-DEFAULT_FRAMEWORK=VLLM
-SOURCE_DIR=$(dirname "$(readlink -f "$0")")
-DOCKERFILE=${SOURCE_DIR}/Dockerfile
-BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
-# Base Images
-TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
-TRTLLM_BASE_IMAGE_TAG=25.12-py3
-# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
-# we need to build the TensorRT-LLM wheel from source.
-#
-# There are two ways to build the dynamo image with TensorRT-LLM.
-# 1. Use the local TensorRT-LLM wheel directory.
-# 2. Use the TensorRT-LLM wheel on artifactory.
-#
-# If using option 1, the TENSORRTLLM_PIP_WHEEL_DIR must be a path to a directory
-# containing TensorRT-LLM wheel file along with commit.txt file with the
-# <arch>_<commit ID> as contents. If no valid trtllm wheel is found, the script
-# will attempt to build the wheel from source and store the built wheel in the
-# specified directory. TRTLLM_COMMIT from the TensorRT-LLM main branch will be
-# used to build the wheel.
-#
-# If using option 2, the TENSORRTLLM_PIP_WHEEL must be the TensorRT-LLM wheel
-# package that will be installed from the specified TensorRT-LLM PyPI Index URL.
-# This option will ignore the TRTLLM_COMMIT option. As the TensorRT-LLM wheel from PyPI
-# is not ABI compatible with NGC PyTorch, you can use TENSORRTLLM_INDEX_URL to specify
-# a private PyPI index URL which has your pre-built TensorRT-LLM wheel.
-#
-# By default, we will use option 1. If you want to use option 2, you can set
-# TENSORRTLLM_PIP_WHEEL to the TensorRT-LLM wheel on artifactory.
-#
-DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
-# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
-# Important Note: This commit is not used in our CI pipeline. See the CI
-# variables to learn how to run a pipeline with a specific commit.
-DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="45d7022cc33903509fd8045bbc577d77dd1d3e2f" # 1.3.0rc1
-TRTLLM_COMMIT=""
-TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
-TRTLLM_GIT_URL=""
-# TensorRT-LLM PyPI index URL
-DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
-# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
-# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
-DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.3.0rc1"
-# TensorRT-LLM wheels on PyPI might not be compatible with the NGC PyTorch.
-# For incompatible versions, we install the wheel from the NGC image during the Docker build.
-# The following versions are not ABI compatible with the NGC PyTorch.
-TRTLLM_ABI_INCOMPATIBLE_VERSIONS=("1.3.0rc1")
-TENSORRTLLM_PIP_WHEEL=""
-TRTLLM_WHEEL_IMAGE=""
-VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-# FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
-# Please check https://github.com/ai-dynamo/dynamo/pull/1065
-# for details and reproducer to manually test if the image
-# can be updated to later versions.
-VLLM_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
-VLLM_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
-VLLM_RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
-VLLM_RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
-VLLM_RUNTIME_IMAGE_TAG_CU13="13.0.2-runtime-ubuntu24.04"
-NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
-SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-SGLANG_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
-SGLANG_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
-SGLANG_CUDA_VERSION="12.9.1"
-SGLANG_CUDA_VERSION_CU13="13.0.1"
-SGLANG_RUNTIME_IMAGE_TAG_CU13="v0.5.8-cu130-runtime"
-PYTHON_VERSION="3.12"
-NIXL_REF=0.9.0
-NIXL_UCX_REF=v1.20.0
-NIXL_GDRCOPY_REF=v2.5.1
-NIXL_LIBFABRIC_REF=v2.3.0
-# AWS EFA installer version
-EFA_VERSION=1.45.1
-NO_CACHE=""
-NO_LOAD=""
-PUSH=""
-# KVBM (KV Cache Block Manager) - default disabled, enabled automatically for VLLM/TRTLLM
-# or can be explicitly enabled via --enable-kvbm flag
-ENABLE_KVBM=false
-# GPU Memory Service - default disabled, enabled automatically for VLLM/SGLANG
-# or can be explicitly enabled via --enable-gpu-memory-service flag
-ENABLE_GPU_MEMORY_SERVICE=false
-# sccache configuration for S3
-USE_SCCACHE=""
-SCCACHE_BUCKET=""
-SCCACHE_REGION=""
-get_options() {
-    while :; do
-        case $1 in
-        -h | -\? | --help)
-            show_help
-            exit
-            ;;
-        --platform)
-            if [ "$2" ]; then
-                PLATFORM=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --framework)
-            if [ "$2" ]; then
-                FRAMEWORK=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --cuda-version)
-            if [ "$2" ]; then
-                echo "INFO: Setting CUDA_VERSION to $2"
-                CUDA_VERSION=$2
-                BUILD_ARGS+=" --build-arg CUDA_VERSION=$2 "
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --nixl-ref)
-            if [ "$2" ]; then
-                NIXL_REF=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tensorrtllm-pip-wheel-dir)
-            if [ "$2" ]; then
-                TENSORRTLLM_PIP_WHEEL_DIR=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tensorrtllm-commit)
-            if [ "$2" ]; then
-                TRTLLM_COMMIT=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tensorrtllm-pip-wheel)
-            if [ "$2" ]; then
-                TENSORRTLLM_PIP_WHEEL=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tensorrtllm-index-url)
-            if [ "$2" ]; then
-                TENSORRTLLM_INDEX_URL=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tensorrtllm-git-url)
-            if [ "$2" ]; then
-                TRTLLM_GIT_URL=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --base-image)
-            if [ "$2" ]; then
-                BASE_IMAGE=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --base-image-tag)
-            if [ "$2" ]; then
-                BASE_IMAGE_TAG=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --target)
-            if [ "$2" ]; then
-                TARGET=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --uid)
-            if [ "$2" ]; then
-                CUSTOM_UID=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --gid)
-            if [ "$2" ]; then
-                CUSTOM_GID=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --build-arg)
-            if [ "$2" ]; then
-                BUILD_ARGS+="--build-arg $2 "
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --tag)
-            if [ "$2" ]; then
-                if [ -z "$TAG" ]; then
-                    TAG="--tag $2"
-                    PRIMARY_TAG="$2"
-                else
-                    TAG+=" --tag $2"
-                fi
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --dry-run)
-            RUN_PREFIX="echo"
-            DRY_RUN="true"
-            echo ""
-            echo "=============================="
-            echo "DRY RUN: COMMANDS PRINTED ONLY"
-            echo "=============================="
-            echo ""
-            ;;
-        --no-cache)
-            NO_CACHE=" --no-cache"
-            ;;
-        --no-load)
-            NO_LOAD=true
-            ;;
-        --push)
-            PUSH=" --push"
-            ;;
-        --cache-from)
-            if [ "$2" ]; then
-                CACHE_FROM+="--cache-from $2 "
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --cache-to)
-            if [ "$2" ]; then
-                CACHE_TO+="--cache-to $2 "
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --build-context)
-            if [ "$2" ]; then
-                BUILD_CONTEXT_ARG="--build-context $2"
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --enable-kvbm)
-            ENABLE_KVBM=true
-            ;;
-        --enable-gpu-memory-service)
-            ENABLE_GPU_MEMORY_SERVICE=true
-            ;;
-        --enable-media-nixl)
-            ENABLE_MEDIA_NIXL=true
-            ;;
-        --enable-media-ffmpeg)
-            ENABLE_MEDIA_FFMPEG=true
-            ;;
-        --make-efa)
-            MAKE_EFA=true
-            ;;
-        --use-sccache)
-            USE_SCCACHE=true
-            ;;
-        --sccache-bucket)
-            if [ "$2" ]; then
-                SCCACHE_BUCKET=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --sccache-region)
-            if [ "$2" ]; then
-                SCCACHE_REGION=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --vllm-max-jobs)
-            # Set MAX_JOBS for vLLM compilation (only used by Dockerfile.vllm)
-            if [ "$2" ]; then
-                MAX_JOBS=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --efa-version)
-            if [ "$2" ]; then
-                EFA_VERSION=$2
-                shift
-            else
-                missing_requirement "$1"
-            fi
-            ;;
-        --no-tag-latest)
-            NO_TAG_LATEST=true
-            ;;
-         -?*)
-            error 'ERROR: Unknown option: ' "$1"
-            ;;
-         ?*)
-            error 'ERROR: Unknown option: ' "$1"
-            ;;
-        *)
-            break
-            ;;
-        esac
-        shift
-    done
-    # Validate that --uid and --gid are only used with local-dev target
-    if [[ -n "${CUSTOM_UID:-}" || -n "${CUSTOM_GID:-}" ]]; then
-        if [[ "${TARGET:-}" != "local-dev" && "${TARGET:-}" != "local-dev-aws" ]]; then
-            error "ERROR: --uid and --gid can only be used with --target local-dev or --target local-dev-aws"
-        fi
-    fi
-    if [ -z "$FRAMEWORK" ]; then
-        FRAMEWORK=$DEFAULT_FRAMEWORK
-    fi
-    if [ -n "$FRAMEWORK" ]; then
-        FRAMEWORK=${FRAMEWORK^^}
-        if [[ -z "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
-            error 'ERROR: Unknown framework: ' "$FRAMEWORK"
-        fi
-        if [ -z "$BASE_IMAGE_TAG" ]; then
-            BASE_IMAGE_TAG=${FRAMEWORK}_BASE_IMAGE_TAG
-            BASE_IMAGE_TAG=${!BASE_IMAGE_TAG}
-            echo "INFO: Using default base image tag for $FRAMEWORK: $BASE_IMAGE_TAG"
-        fi
-        if [ -z "$BASE_IMAGE" ]; then
-            BASE_IMAGE=${FRAMEWORK}_BASE_IMAGE
-            BASE_IMAGE=${!BASE_IMAGE}
-        fi
-        if [[ $FRAMEWORK == "VLLM" ]] && [[ $CUDA_VERSION == "13."* ]]; then
-            BASE_IMAGE_TAG=$VLLM_BASE_IMAGE_TAG_CU13
-            BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${VLLM_BASE_IMAGE_TAG_CU13} "
-            RUNTIME_IMAGE_TAG=$VLLM_RUNTIME_IMAGE_TAG_CU13
-            BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${VLLM_RUNTIME_IMAGE_TAG_CU13} "
-            echo "INFO: Overriding base image tag for vLLM with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
-        fi
-        if [[ $FRAMEWORK == "SGLANG" ]] && [[ $CUDA_VERSION == "13."* ]]; then
-            BASE_IMAGE_TAG=$SGLANG_BASE_IMAGE_TAG_CU13
-            BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${SGLANG_BASE_IMAGE_TAG_CU13} "
-            SGLANG_CUDA_VERSION="${SGLANG_CUDA_VERSION_CU13}"
-            RUNTIME_IMAGE_TAG="${SGLANG_RUNTIME_IMAGE_TAG_CU13}"
-            BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${RUNTIME_IMAGE_TAG} "
-            echo "INFO: Overriding base image tag for SGLang with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
-        fi
-        if [ -z "$BASE_IMAGE" ]; then
-            error "ERROR: Framework $FRAMEWORK without BASE_IMAGE"
-        fi
-        BASE_VERSION=${FRAMEWORK}_BASE_VERSION
-        BASE_VERSION=${!BASE_VERSION}
-    fi
-    if [ -z "$TAG" ]; then
-        TAG="--tag dynamo:${VERSION}-${FRAMEWORK,,}"
-        PRIMARY_TAG="dynamo:${VERSION}-${FRAMEWORK,,}"
-        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
-            TAG="${TAG}-${TARGET}"
-            PRIMARY_TAG="${PRIMARY_TAG}-${TARGET}"
-        fi
-    fi
-    if [ -n "$PLATFORM" ]; then
-        PLATFORM="--platform ${PLATFORM}"
-    fi
-    if [ -n "$TARGET" ]; then
-        TARGET_STR="--target ${TARGET}"
-    else
-        TARGET_STR="--target dev"
-    fi
-    # Validate sccache configuration
-    if [ "$USE_SCCACHE" = true ]; then
-        if [ -z "$SCCACHE_BUCKET" ]; then
-            error "ERROR: --sccache-bucket is required when --use-sccache is specified"
-        fi
-        if [ -z "$SCCACHE_REGION" ]; then
-            error "ERROR: --sccache-region is required when --use-sccache is specified"
-        fi
-    fi
-}
-show_image_options() {
-    echo ""
-    echo "Building Dynamo Image: '${TAG}'"
-    echo ""
-    echo "   Base: '${BASE_IMAGE}'"
-    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
-    if [[ $FRAMEWORK == "TRTLLM" ]]; then
-        echo "   Tensorrtllm_Pip_Wheel: '${PRINT_TRTLLM_WHEEL_FILE}'"
-    fi
-    echo "   Build Context: '${BUILD_CONTEXT}'"
-    echo "   Build Arguments: '${BUILD_ARGS}'"
-    echo "   Framework: '${FRAMEWORK}'"
-    if [ "$USE_SCCACHE" = true ]; then
-        echo "   sccache: Enabled"
-        echo "   sccache Bucket: '${SCCACHE_BUCKET}'"
-        echo "   sccache Region: '${SCCACHE_REGION}'"
-        if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
-            echo "   sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
-        fi
-    fi
-    echo ""
-}
-show_help() {
-    echo "usage: build.sh"
-    echo "  [--base-image base image]"
-    echo "  [--base-image-tag base image tag]"
-    echo "  [--platform platform for docker build]"
-    echo "  [--framework framework one of ${!FRAMEWORKS[*]}]"
-    echo "  [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
-    echo "  [--tensorrtllm-commit tensorrtllm commit/tag/branch to use for building the trtllm wheel if the wheel is not provided]"
-    echo "  [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
-    echo "  [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
-    echo "  [--tensorrtllm-git-url tensorrtllm git repository URL for cloning]"
-    echo "  [--build-arg additional build args to pass to docker build]"
-    echo "  [--cache-from cache location to start from]"
-    echo "  [--cache-to location where to cache the build output]"
-    echo "  [--tag tag for image (can be specified multiple times)]"
-    echo "  [--uid user ID for local-dev images (only with --target local-dev)]"
-    echo "  [--gid group ID for local-dev images (only with --target local-dev)]"
-    echo "  [--no-cache disable docker build cache]"
-    echo "  [--no-load do not load the image into docker (disables default --load)]"
-    echo "  [--push push the image to the registry]"
-    echo "  [--dry-run print docker commands without running]"
-    echo "  [--build-context name=path to add build context]"
-    echo "  [--release-build perform a release build]"
-    echo "  [--make-efa Adds AWS EFA layer on top of the built image (works with any target)]"
-    echo "  [--enable-kvbm Enables KVBM support in Python 3.12]"
-    echo "  [--enable-gpu-memory-service Enables GPU Memory Service support]"
-    echo "  [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
-    echo "  [--enable-media-ffmpeg Enable media processing with FFMPEG support (default: true for frameworks, false for none)]"
-    echo "  [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
-    echo "  [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
-    echo "  [--sccache-region S3 region for sccache (required with --use-sccache)]"
-    echo "  [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
-    echo "  [--efa-version AWS EFA installer version (default: 1.45.1)]"
-    echo "  [--no-tag-latest do not add latest-{framework} tag to built image]"
-    echo ""
-    echo "  Note: When using --use-sccache, AWS credentials must be set:"
-    echo "        export AWS_ACCESS_KEY_ID=your_access_key"
-    echo "        export AWS_SECRET_ACCESS_KEY=your_secret_key"
-    exit 0
-}
-missing_requirement() {
-    error "ERROR: $1 requires an argument."
-}
-error() {
-    printf '%s %s\n' "$1" "$2" >&2
-    exit 1
-}
-get_options "$@"
-# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
-ARCH="amd64"
-if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
-    ARCH="arm64"
-    BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
-fi
-# Set the commit sha in the container so we can inspect what build this relates to
-DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA:-$(git rev-parse HEAD)}
-BUILD_ARGS+=" --build-arg DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA "
-# Update DOCKERFILE if framework is VLLM
-if [[ $FRAMEWORK == "VLLM" ]]; then
-    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
-elif [[ $FRAMEWORK == "TRTLLM" ]]; then
-    DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
-elif [[ $FRAMEWORK == "NONE" ]]; then
-    DOCKERFILE=${SOURCE_DIR}/Dockerfile
-elif [[ $FRAMEWORK == "SGLANG" ]]; then
-    DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
-fi
-# Add NIXL_REF as a build argument
-BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
-# Add NIXL_LIBFABRIC_REF as a build argument
-BUILD_ARGS+=" --build-arg NIXL_LIBFABRIC_REF=${NIXL_LIBFABRIC_REF} "
-# Add EFA_VERSION as a build argument
-BUILD_ARGS+=" --build-arg EFA_VERSION=${EFA_VERSION} "
-# Function to build AWS EFA images from base runtime or dev images
-build_aws_with_header() {
-    local base_image="$1"
-    local tags="$2"
-    local aws_target="$3"  # runtime-aws or dev-aws
-    local success_msg="$4"
-    DOCKERFILE_AWS="${SOURCE_DIR}/Dockerfile.aws"
-    if [[ ! -f "$DOCKERFILE_AWS" ]]; then
-        echo "ERROR: Dockerfile.aws not found at: $DOCKERFILE_AWS"
-        exit 1
-    fi
-    echo ""
-    echo "Building AWS EFA image from base: $base_image"
-    echo "Target stage: $aws_target"
-    # Show the docker command being executed if not in dry-run mode
-    if [ -z "$RUN_PREFIX" ]; then
-        set -x
-    fi
-    $RUN_PREFIX docker build --progress=plain \
-        --build-arg BASE_IMAGE="$base_image" \
-        --build-arg EFA_VERSION="${EFA_VERSION}" \
-        --target "$aws_target" \
-        --file "$DOCKERFILE_AWS" \
-        $PLATFORM \
-        $tags \
-        "$SOURCE_DIR" || {
-        { set +x; } 2>/dev/null
-        echo "ERROR: Failed to build AWS EFA image"
-        exit 1
-    }
-    { set +x; } 2>/dev/null
-    echo "$success_msg"
-}
-BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
-if [ -n "${GITHUB_TOKEN}" ]; then
-    BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
-fi
-if [ -n "${GITLAB_TOKEN}" ]; then
-    BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
-fi
-check_wheel_file() {
-    local wheel_dir="$1"
-    # Check if directory exists
-    if [ ! -d "$wheel_dir" ]; then
-        echo "Error: Directory '$wheel_dir' does not exist"
-        return 1
-    fi
-    # Look for .whl files
-    wheel_count=$(find "$wheel_dir" -name "*.whl" | wc -l)
-    if [ "$wheel_count" -eq 0 ]; then
-        echo "WARN: No .whl files found in '$wheel_dir'"
-        return 1
-    elif [ "$wheel_count" -gt 1 ]; then
-        echo "Warning: Multiple wheel files found in '$wheel_dir'. Will use first one found."
-        find "$wheel_dir" -name "*.whl" | head -n 1
-        return 0
-    fi
-    echo "Found $wheel_count wheel in $wheel_dir"
-    return 0
-}
-get_trtllm_version_from_pip_wheel() {
-    local wheel_spec="$1"
-    if [[ "$wheel_spec" =~ == ]]; then
-        local version
-        version=$(echo "$wheel_spec" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
-        if _is_semver_ref "$version"; then
-            echo "${version#v}"
-            return 0
-        fi
-    fi
-    echo ""
-    return 0
-}
-trtllm_version_incompatible() {
-    local version="$1"
-    for incompatible_version in "${TRTLLM_ABI_INCOMPATIBLE_VERSIONS[@]}"; do
-        if [[ "$version" == "$incompatible_version" ]]; then
-            return 0
-        fi
-    done
-    return 1
-}
-_is_semver_ref() {
-    local ref="$1"
-    local semver_regex='^v?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)([-+][0-9A-Za-z.-]+|[A-Za-z][0-9A-Za-z.-]+)?$'
-    [[ "$ref" =~ $semver_regex ]]
-}
-get_github_trtllm_ref() {
-    local commit="$1"
-    if _is_semver_ref "$commit"; then
-        if [[ "$commit" =~ ^v ]]; then
-            echo "$commit"
-        else
-            echo "v${commit}"
-        fi
-        return 0
-    fi
-    echo "$commit"
-    return 0
-}
-function determine_user_intention_trtllm() {
-    # The tensorrt llm installation flags are not quite mutually exclusive
-    # since the user should be able to point at a directory of their choosing
-    # for storing a trtllm wheel built from source.
-    #
-    # This function attempts to discern the intention of the user by
-    # applying checks, or rules, for each of the scenarios.
-    #
-    # /return: Calculated intention. One of "download", "install", "build".
-    #
-    # The three different methods of installing TRTLLM with build.sh are:
-    # 1. Download
-    # required: --tensorrtllm-pip-wheel
-    # optional: --tensorrtllm-index-url
-    # optional: --tensorrtllm-commit
-    #
-    # 2. Install from pre-built
-    # required: --tensorrtllm-pip-wheel-dir
-    # optional: --tensorrtllm-commit
-    #
-    # 3. Build from source
-    # required: --tensorrtllm-git-url
-    # optional: --tensorrtllm-commit
-    # optional: --tensorrtllm-pip-wheel-dir
-    local intention_download="false"
-    local intention_install="false"
-    local intention_build="false"
-    local intention_count=0
-    TRTLLM_INTENTION=${TRTLLM_INTENTION}
-    # Install from pre-built
-    if [[ -n "$TENSORRTLLM_PIP_WHEEL_DIR"  && ! -n "$TRTLLM_GIT_URL" ]]; then
-        intention_install="true";
-        intention_count=$((intention_count+1))
-        TRTLLM_INTENTION="install"
-    fi
-    echo "  Intent to Install TRTLLM: $intention_install"
-    # Build from source
-    if [[ -n "$TRTLLM_GIT_URL" ]]; then
-        intention_build="true";
-        intention_count=$((intention_count+1))
-        TRTLLM_INTENTION="build"
-    fi
-    echo "  Intent to Build TRTLLM: $intention_build"
-    # Download from repository
-    if [[ -n "$TENSORRTLLM_INDEX_URL" ]] && [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
-        intention_download="true";
-        intention_count=$((intention_count+1));
-        TRTLLM_INTENTION="download"
-        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from index: $TENSORRTLLM_INDEX_URL"
-    elif [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
-        intention_download="true";
-        intention_count=$((intention_count+1));
-        TRTLLM_INTENTION="download"
-        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from default pip index."
-    fi
-    # If nothing is set then we default to downloading the wheel
-    # with the defaults sepcified at the top this file.
-    if [[ -z "${TENSORRTLLM_INDEX_URL}" ]] && [[ -z "${TENSORRTLLM_PIP_WHEEL}" ]] && [[ "${intention_count}" -eq 0 ]]; then
-        intention_download="true";
-        intention_count=$((intention_count+1))
-        TRTLLM_INTENTION="download"
-        echo "INFO: Inferring download because both TENSORRTLLM_PIP_WHEEL and TENSORRTLLM_INDEX_URL are not set."
-    fi
-    echo "  Intent to Download TRTLLM: $intention_download"
-    if [[ ! "$intention_count" -eq 1 ]]; then
-        echo -e "[ERROR] Could not figure out the trtllm installation intent from the current flags. Please check your build.sh command against the following"
-        echo -e "  The grouped flags are mutually exclusive:"
-        echo -e "  To download and install use both: --tensorrtllm-index-url, --tensorrtllm-pip-wheel"
-        echo -e "  To install from a pre-built wheel use: --tensorrtllm-pip-wheel-dir"
-        echo -e "  To build from source and install use both: --tensorrtllm-commit, --tensorrtllm-git-url"
-        exit 1
-    fi
-}
-if [[ $FRAMEWORK == "TRTLLM" ]]; then
-    echo -e "Determining the user's TRTLLM installation intent..."
-    determine_user_intention_trtllm   # From this point forward, can assume correct TRTLLM flags
-    if [[ "$TRTLLM_INTENTION" == "download" ]]; then
-        TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL:-$DEFAULT_TENSORRTLLM_INDEX_URL}
-        TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL:-$DEFAULT_TENSORRTLLM_PIP_WHEEL}
-        TRTLLM_WHEEL_VERSION=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
-        if trtllm_version_incompatible "${TRTLLM_WHEEL_VERSION}"; then
-            TRTLLM_WHEEL_IMAGE="nvcr.io/nvidia/tensorrt-llm/release:${TRTLLM_WHEEL_VERSION}"
-            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
-            BUILD_ARGS+=" --build-arg TRTLLM_WHEEL_IMAGE=${TRTLLM_WHEEL_IMAGE}"
-            PRINT_TRTLLM_WHEEL_FILE=${TRTLLM_WHEEL_IMAGE}
-        else
-            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
-            BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL}"
-            BUILD_ARGS+=" --build-arg TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL}"
-            PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
-        fi
-        # Create a dummy directory to satisfy the build context requirement
-        # There is no way to conditionally copy the build context in dockerfile.
-        mkdir -p /tmp/trtllm_wheel_context
-        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/trtllm_wheel_context"
-    elif [[ "$TRTLLM_INTENTION" == "install" ]]; then
-        echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
-        if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}"; then
-            echo "ERROR: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}"
-            echo "      If this is not intended you can try building from source with the following variables set instead:"
-            echo ""
-            echo "      --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM --tensorrtllm-commit $TRTLLM_COMMIT"
-            exit 1
-        fi
-        echo "Installing TensorRT-LLM from local wheel directory"
-        BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
-        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
-        PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
-    elif [[ "$TRTLLM_INTENTION" == "build" ]]; then
-        TENSORRTLLM_PIP_WHEEL_DIR=${TENSORRTLLM_PIP_WHEEL_DIR:=$DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR}
-        echo "TRTLLM pip wheel output directory is: ${TENSORRTLLM_PIP_WHEEL_DIR}"
-        if [ "$DRY_RUN" != "true" ]; then
-            GIT_URL_ARG=""
-            if [ -n "${TRTLLM_GIT_URL}" ]; then
-                GIT_URL_ARG="-u ${TRTLLM_GIT_URL}"
-            fi
-            if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF} ${GIT_URL_ARG}; then
-                error "ERROR: Failed to build TensorRT-LLM wheel"
-            fi
-            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
-            BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
-            PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
-        fi
-    else
-        echo 'No intention was set. This error should have been detected in "determine_user_intention_trtllm()". Exiting...'
-        exit 1
-    fi
-    # Need to know the commit of TRTLLM so we can determine the
-    # TensorRT installation associated with TRTLLM.
-    if [[ -z "$TRTLLM_COMMIT" ]]; then
-        # Attempt to default since the commit will work with a hash or a tag/branch
-        if [[ ! -z "$TENSORRTLLM_PIP_WHEEL" ]]; then
-            TRTLLM_COMMIT=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
-            if [[ -z "$TRTLLM_COMMIT" ]]; then
-                echo -e "[ERROR] Could not parse a semver version from TENSORRTLLM_PIP_WHEEL: ${TENSORRTLLM_PIP_WHEEL}"
-                exit 1
-            fi
-            echo "Attempting to default TRTLLM_COMMIT to \"$TRTLLM_COMMIT\" for installation of TensorRT."
-        else
-            echo -e "[ERROR] TRTLLM framework was set as a target but the TRTLLM_COMMIT variable was not set."
-            echo -e "  Could not find a suitible default by infering from TENSORRTLLM_PIP_WHEEL."
-            echo -e "  TRTLLM_COMMIT is needed to install the correct version of TensorRT associated with TensorRT-LLM."
-            exit 1
-        fi
-    fi
-    GITHUB_TRTLLM_REF=$(get_github_trtllm_ref "${TRTLLM_COMMIT}")
-    BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${GITHUB_TRTLLM_REF}"
-fi
-# ENABLE_KVBM: Used in base Dockerfile for block-manager feature.
-#              Declared but not currently used in Dockerfile.{vllm,trtllm}.
-# Force KVBM to be enabled for VLLM and TRTLLM frameworks
-if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
-    echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
-    ENABLE_KVBM=true
-fi
-# For other frameworks, ENABLE_KVBM defaults to false unless --enable-kvbm flag was provided
-if [[ ${ENABLE_KVBM} == "true" ]]; then
-    echo "Enabling KVBM in the dynamo image"
-    BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
-fi
-# ENABLE_GPU_MEMORY_SERVICE: Used in Dockerfiles for gpu_memory_service wheel.
-#                            Declared but not currently used in Dockerfile.trtllm.
-# Force GPU Memory Service to be enabled for VLLM and SGLANG frameworks
-if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
-    echo "Forcing enable_gpu_memory_service to true in ${FRAMEWORK} image build"
-    ENABLE_GPU_MEMORY_SERVICE=true
-fi
-# For other frameworks, ENABLE_GPU_MEMORY_SERVICE defaults to false unless --enable-gpu-memory-service flag was provided
-if [[ ${ENABLE_GPU_MEMORY_SERVICE} == "true" ]]; then
-    echo "Enabling GPU Memory Service in the dynamo image"
-    BUILD_ARGS+=" --build-arg ENABLE_GPU_MEMORY_SERVICE=${ENABLE_GPU_MEMORY_SERVICE} "
-fi
-# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
-# Used in base Dockerfile for maturin build feature flag.
-# Can be explicitly overridden with --enable-media-nixl flag
-if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
-    if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
-        ENABLE_MEDIA_NIXL=true
-    else
-        ENABLE_MEDIA_NIXL=false
-    fi
-fi
-BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "
-# ENABLE_MEDIA_FFMPEG: Enable media processing with FFMPEG support
-# Used in base Dockerfile for maturin build feature flag.
-# Can be explicitly overridden with --enable-media-ffmpeg flag
-if [ -z "${ENABLE_MEDIA_FFMPEG}" ]; then
-    if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
-        ENABLE_MEDIA_FFMPEG=true
-    else
-        ENABLE_MEDIA_FFMPEG=false
-    fi
-fi
-BUILD_ARGS+=" --build-arg ENABLE_MEDIA_FFMPEG=${ENABLE_MEDIA_FFMPEG} "
-# NIXL_UCX_REF: Used in base Dockerfile only.
-if [ -n "${NIXL_UCX_REF}" ]; then
-    BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
-fi
-# NIXL_GDRCOPY_REF: Used in dynamo base stages.
-if [ -n "${NIXL_GDRCOPY_REF}" ]; then
-    BUILD_ARGS+=" --build-arg NIXL_GDRCOPY_REF=${NIXL_GDRCOPY_REF} "
-fi
-# MAX_JOBS is only used by Dockerfile.vllm
-if [ -n "${MAX_JOBS}" ]; then
-    BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
-fi
-if [[ $FRAMEWORK == "SGLANG" ]]; then
-    echo "Customizing Python, CUDA, and framework images for sglang images"
-    BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
-fi
-BUILD_ARGS+=" --build-arg PYTHON_VERSION=${PYTHON_VERSION}"
-# Add sccache build arguments
-if [ "$USE_SCCACHE" = true ]; then
-    BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
-    BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
-    BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
-    BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
-    BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
-fi
-if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
-    # Add arguments required for sglang blackwell build
-    BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
-fi
-# Dev/local-dev targets: build from a concatenated Dockerfile:
-#   <framework Dockerfile> + container/dev/Dockerfile.dev
-if [[ -z "${TARGET:-}" || "${TARGET:-}" == "dev" || "${TARGET:-}" == "local-dev" ]]; then
-    _gen_dev_dockerfile_temp() {
-        local fw_df dev_df out
-        fw_df="$1"
-        dev_df="${SOURCE_DIR}/dev/Dockerfile.dev"
-        if [[ ! -f "${fw_df}" ]]; then
-            error "ERROR:" "Framework Dockerfile not found: ${fw_df}"
-        fi
-        if [[ ! -f "${dev_df}" ]]; then
-            error "ERROR:" "Dev Dockerfile not found: ${dev_df}"
-        fi
-        out="$(mktemp -t dynamo-dev-combined.XXXXXX.Dockerfile)"
-        cat "${fw_df}" "${dev_df}" > "${out}"
-        printf '\n' >> "${out}"
-        if [[ ! -s "${out}" ]]; then
-            rm -f "${out}"
-            error "ERROR:" "Temp Dockerfile was generated but is empty"
-        fi
-        printf '%s\n' "${out}"
-    }
-    DOCKERFILE="$(_gen_dev_dockerfile_temp "${DOCKERFILE}")"
-    # Ensure we clean up the temp Dockerfile (opt-out with KEEP_DEV_DOCKERFILE_TEMP=1 for debugging).
-    if [[ "${KEEP_DEV_DOCKERFILE_TEMP:-}" != "1" ]]; then
-        trap 'rm -f "${DOCKERFILE}" 2>/dev/null || true' EXIT
-    fi
-    # Dockerfile.dev expects a lowercase framework string.
-    BUILD_ARGS+=" --build-arg FRAMEWORK=${FRAMEWORK,,} "
-    # Preserve historical tagging behavior for dev/local-dev (build.sh used to delegate out).
-    base="${PRIMARY_TAG}"
-    base="${base%-runtime}"
-    base="${base%-local-dev}"
-    base="${base%-dev}"
-    if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
-        TAG="--tag ${base}-dev"
-    else
-        TAG="--tag ${base}-local-dev"
-        # Default UID/GID behavior: current user if not specified.
-        if [[ -z "${CUSTOM_UID:-}" ]]; then
-            CUSTOM_UID="$(id -u)"
-        fi
-        if [[ -z "${CUSTOM_GID:-}" ]]; then
-            CUSTOM_GID="$(id -g)"
-        fi
-        BUILD_ARGS+=" --build-arg USER_UID=${CUSTOM_UID} --build-arg USER_GID=${CUSTOM_GID} "
-    fi
-fi
-LATEST_TAG=""
-if [ -z "${NO_TAG_LATEST}" ]; then
-    if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
-        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
-    elif [[ "${TARGET}" == "local-dev" ]]; then
-        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}-local-dev"
-    else
-        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
-        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
-            LATEST_TAG="${LATEST_TAG}-${TARGET}"
-        fi
-    fi
-fi
-show_image_options
-# Handle FRONTEND target: build EPP image first
-if [[ ${TARGET^^} == "FRONTEND" ]]; then
-    echo "Building FRONTEND image - requires EPP image"
-    echo ""
-    echo "Building EPP image for Frontend using Makefile..."
-    # EPP directory with the new self-contained build
-    EPP_DIR="${BUILD_CONTEXT}/deploy/inference-gateway/epp"
-    # Set DOCKER_PROXY from ECR_HOSTNAME if available (for pulling base images through proxy)
-    # This prevents rate-limiting when building in CI across multiple PRs
-    DOCKER_PROXY_ARG=""
-    if [[ -n "${ECR_HOSTNAME}" ]]; then
-        DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
-        DOCKER_PROXY_ARG="DOCKER_PROXY=${DOCKER_PROXY}"
-        echo "Using DOCKER_PROXY: ${DOCKER_PROXY}"
-    fi
-    # Build EPP image using the Makefile
-    # The Makefile handles: building Dynamo library, building Docker image, loading it locally
-    $RUN_PREFIX make -C "${EPP_DIR}" all DYNAMO_DIR="${BUILD_CONTEXT}" ${DOCKER_PROXY_ARG}
-    # Compute EPP image tag (must match Makefile's IMAGE_TAG)
-    # IMAGE_TAG = $(IMAGE_REPO):$(GIT_TAG)
-    # IMAGE_REPO = $(DOCKER_SERVER)/$(IMAGE_NAME)
-    # Image lives in local cache only, not pushed to any registry
-    EPP_DOCKER_SERVER="dynamo"
-    EPP_IMAGE_NAME="dynamo-epp"
-    EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
-    EPP_IMAGE_TAG="${EPP_DOCKER_SERVER}/${EPP_IMAGE_NAME}:${EPP_GIT_TAG}"
-    echo "Successfully built EPP image: ${EPP_IMAGE_TAG}"
-    # Add build args for frontend image
-    BUILD_ARGS+=" --build-arg EPP_IMAGE=${EPP_IMAGE_TAG}"
-fi
-# Always build the main image first
-# Create build log directory for BuildKit reports
-BUILD_LOG_DIR="${BUILD_CONTEXT}/build-logs"
-mkdir -p "${BUILD_LOG_DIR}"
-SINGLE_BUILD_LOG="${BUILD_LOG_DIR}/single-stage-build.log"
-# Determine --load flag (default on unless --no-load or --push specified)
-LOAD_FLAG=""
-if [ "$NO_LOAD" != "true" ] && [ -z "$PUSH" ]; then
-    LOAD_FLAG=" --load"
-fi
-# Use BuildKit for enhanced metadata
-if docker buildx version &>/dev/null; then
-    $RUN_PREFIX docker buildx build --progress=plain ${LOAD_FLAG} ${PUSH} -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
-    BUILD_EXIT_CODE=${PIPESTATUS[0]}
-else
-    $RUN_PREFIX DOCKER_BUILDKIT=1 docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
-    BUILD_EXIT_CODE=${PIPESTATUS[0]}
-fi
-if [ ${BUILD_EXIT_CODE} -ne 0 ]; then
-    exit ${BUILD_EXIT_CODE}
-fi
-# Handle --make-efa flag: add AWS EFA layer on top of the built image
-# This runs BEFORE local-dev so the flow is: dev -> dev-aws -> local-dev-aws
-if [[ "${MAKE_EFA:-}" == "true" ]]; then
-    # Get the base image that was just built (use PRIMARY_TAG to avoid parsing issues)
-    BASE_IMAGE_FOR_EFA="${PRIMARY_TAG}"
-    # Determine the EFA stage based on the target
-    # runtime target -> runtime-aws stage
-    # dev/local-dev target -> dev-aws stage
-    if [[ "${TARGET:-dev}" == "runtime" ]]; then
-        EFA_STAGE="runtime-aws"
-    else
-        EFA_STAGE="dev-aws"
-    fi
-    # Build AWS tags by appending -aws to existing tags
-    AWS_TAGS=""
-    if [[ -n "$TAG" ]]; then
-        AWS_TAG=$(echo "$TAG" | sed 's/--tag //')
-        AWS_TAGS+=" --tag ${AWS_TAG}-aws"
-    fi
-    if [[ -n "$LATEST_TAG" ]]; then
-        AWS_LATEST_TAG=$(echo "$LATEST_TAG" | sed 's/--tag //')
-        AWS_TAGS+=" --tag ${AWS_LATEST_TAG}-aws"
-    fi
-    build_aws_with_header "$BASE_IMAGE_FOR_EFA" "$AWS_TAGS" "$EFA_STAGE" "Successfully built ${EFA_STAGE} image"
-fi
-{ set +x; } 2>/dev/null
\ No newline at end of file
--- a/container/context.yaml
+++ b/container/context.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# This file represents the default ARG values of Dockerfiles generated
+# by render.py. These are the recommended default values for users and
+# is the source of truth for the values used in our delivered images.
+#
+# Some ARGs have multiple valid values and can be changed for local testing,
+# you can do so locally in this file, or pass the --build-arg into docker build
+# when building.
+dynamo:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  base_image_tag: 25.01-cuda12.8-devel-ubuntu24.04
+  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
+  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
+  python_version: "3.12"
+  nats_version: v2.10.28
+  etcd_version: v3.5.21
+  nixl_ref: 0.9.0
+  nixl_ucx_ref: v1.20.0
+  nixl_gdrcopy_ref: v2.5.1
+  nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
+  nixl_libfabric_ref: v2.3.0
+  enable_kvbm: "false"
+  enable_media_nixl: "false"
+  enable_media_ffmpeg: "false"
+  enable_gpu_memory_service: "false"
+  ffmpeg_version: "7.1"
+  efa_version: 1.45.1
+vllm:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image: nvcr.io/nvidia/cuda
+  runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
+  vllm_ref: v0.14.1
+  flashinf_ref: v0.5.3
+  lmcache_ref: 0.3.12
+  max_jobs: "10"
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "true"
+  enable_kvbm: "true"
+  cuda12.9:
+    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+  cuda13.0:
+    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
+sglang:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image: lmsysorg/sglang
+  cuda12.9:
+    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+    runtime_image_tag: v0.5.7-runtime
+  cuda13.0:
+    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
+    runtime_image_tag: v0.5.8-cu130-runtime
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "true"
+  enable_kvbm: "false"
+trtllm:
+  base_image: nvcr.io/nvidia/pytorch
+  base_image_tag: 25.12-py3
+  runtime_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image_tag: 25.10-cuda13.0-runtime-ubuntu24.04
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "false"
+  enable_kvbm: "true"
+  python_version: "3.12"
+  index_url: https://pypi.nvidia.com/
+  pip_wheel_dir: /tmp/trtllm_wheel/
+  pip_wheel: tensorrt-llm==1.3.0rc1
+  trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
+  github_trtllm_commit: 1.2.0rc6
+  torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
+  torch_tensorrt_version: 2.10.0a0
+  torchvision_version: 0.25.0a0+ca221243
+  torchao_ver: 0.15.0+git01374eb5
+  torchdata_ver: 0.11.0
+  torchtitan_ver: 0.2.0
+  jinja2_version: 3.1.6
+  sympy_version: 1.14.0
+  pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
+  flash_attn_version: 2.7.4.post1+25.12
+  flashinfer_python_ver: 0.6.1
+  has_trtllm_context: "0"
--- a/container/render.py
+++ b/container/render.py
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import argparse
+import re
+import sys
+from pathlib import Path
+import yaml
+from jinja2 import Environment, FileSystemLoader
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Renders dynamo Dockerfiles from templates"
+    )
+    parser.add_argument(
+        "--framework",
+        type=str,
+        default="vllm",
+        help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
+    )
+    parser.add_argument(
+        "--target",
+        type=str,
+        default="runtime",
+        help="Dockerfile target to use. Non-exhaustive examples: [runtime, dev, local-dev]",
+    )
+    parser.add_argument(
+        "--platform",
+        type=str,
+        default="amd64",
+        help="Dockerfile platform to use. [amdg64, arm64]",
+    )
+    parser.add_argument(
+        "--cuda-version",
+        type=str,
+        default="12.9",
+        help="CUDA version to use. [12.9, 13.0]",
+    )
+    parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
+    parser.add_argument(
+        "--short-output",
+        action="store_true",
+        help="Output filename is just rendered.Dockerfile",
+    )
+    parser.add_argument(
+        "--show-result",
+        action="store_true",
+        help="Prints the rendered Dockerfile to stdout.",
+    )
+    args = parser.parse_args()
+    return args
+def validate_args(args):
+    # TODO: Add validation logic
+    return
+def render(args, context, script_dir):
+    env = Environment(
+        loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
+    )
+    template = env.get_template("Dockerfile.template")
+    rendered = template.render(
+        context=context,
+        framework=args.framework,
+        target=args.target,
+        platform=args.platform,
+        cuda_version=args.cuda_version,
+        make_efa=args.make_efa,
+    )
+    # Replace all instances of 3+ newlines with 2 newlines
+    cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
+    if args.short_output:
+        filename = "rendered.Dockerfile"
+    else:
+        filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
+    with open(f"{script_dir}/{filename}", "w") as f:
+        f.write(cleaned)
+    if args.show_result:
+        print("##############")
+        print("# Dockerfile #")
+        print("##############")
+        print(cleaned)
+        print("##############")
+    print(f"INFO: Generated Dockerfile written to {script_dir}/{filename}")
+    return
+def main():
+    args = parse_args()
+    validate_args(args)
+    script_dir = Path(sys.argv[0]).parent
+    with open(f"{script_dir}/context.yaml", "r") as f:
+        context = yaml.safe_load(f)
+    render(args, context, script_dir)
+    if args.target == "local-dev":
+        print(
+            "INFO: Remember to add --build-arg values for USER_UID and USER_GID when building a local-dev image!"
+        )
+        print(
+            "      Recommendation: --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g)"
+        )
+if __name__ == "__main__":
+    main()
--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
+{#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#}
+##########################
+#### Build Arguments #####
+##########################
+# Define general architecture ARGs for supporting both x86 and aarch64 builds.
+#   ARCH: Used for package suffixes (e.g., amd64, arm64)
+#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
+#
+# Default values are for x86/amd64:
+#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
+#
+# For arm64/aarch64, build with:
+#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
+#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
+ARG ARCH={{ platform }}
+ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
+# Python/CUDA configuration
+ARG PYTHON_VERSION={{ context.dynamo.python_version }}
+ARG CUDA_VERSION={{ cuda_version }}
+ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
+{% if framework == "vllm" or framework == "sglang" -%}
+{% set cuda_context_key = "cuda" + cuda_version %}
+# Base image configuration
+ARG BASE_IMAGE={{ context[framework].base_image }}
+ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
+{% elif framework != "vllm" and framework != "sglang" -%}
+ARG BASE_IMAGE={{ context[framework].base_image }}
+ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
+{%- endif %}
+{% if framework == "sglang" -%}
+{% set cuda_context_key = "cuda" + cuda_version %}
+# Base image configuration
+ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
+ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
+{% elif framework != "dynamo" -%}
+ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
+ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
+{%- endif %}
+# Build configuration
+ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
+ARG CARGO_BUILD_JOBS
+ARG NATS_VERSION={{ context.dynamo.nats_version }}
+ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
+ARG ENABLE_MEDIA_NIXL={{ context[framework].enable_media_nixl }}
+ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
+ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
+ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
+# SCCACHE configuration
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET=""
+ARG SCCACHE_REGION=""
+# NIXL configuration
+ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
+ARG NIXL_REF={{ context.dynamo.nixl_ref }}
+ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
+ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
+{% if target == "dev" or target == "local-dev" %}
+ARG FRAMEWORK={{ framework }}
+{% endif %}
+{% if target == "frontend" %}
+ARG EPP_IMAGE={{ context.dynamo.epp_image }}
+ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
+{% endif %}
+{% if framework == "vllm" -%}
+# Make sure to update the dependency version in pyproject.toml when updating this
+ARG VLLM_REF={{ context.vllm.vllm_ref }}
+ARG MAX_JOBS={{ context.vllm.max_jobs }}
+# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
+ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
+ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
+# If left blank, then we will fallback to vLLM defaults
+ARG DEEPGEMM_REF=""
+{%- endif -%}
+{% if framework == "trtllm" %}
+# TensorRT-LLM specific configuration
+ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
+ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
+ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
+ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
+ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}
+# Copy pytorch installation from NGC PyTorch
+ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
+ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
+ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
+ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
+ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
+ARG TORCH_VER={{ context.trtllm.torch_version }}
+ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
+ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
+ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
+ARG SYMPY_VER={{ context.trtllm.sympy_version }}
+ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}
+# Python configuration
+ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
+{%- endif -%}
+{% if make_efa == true %}
+ARG EFA_VERSION={{ context.dynamo.efa_version }}
+ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
+{%- endif -%}
\ No newline at end of file
--- a/container/Dockerfile.aws
+++ b/container/Dockerfile.aws
-# syntax=docker/dockerfile:1.10.0
+{#
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+#}
+#############################
+########## AWS EFA ##########
+#############################
 #
-# PURPOSE: AWS EFA support layer
+# This stage extends the runtime/dev stage with AWS EFA installer
-#
-# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
-# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
-#
-# Usage (via build.sh with --make-efa flag):
-#   ./build.sh --framework vllm --target runtime --make-efa
-#   ./build.sh --framework vllm --target local-dev --make-efa
-ARG BASE_IMAGE
-ARG EFA_VERSION
-###########################################################
-########## Runtime with AWS EFA ##########################
-###########################################################
-#
-# This stage extends the runtime stage with AWS EFA installer
 # which includes: libfabric and aws-ofi-nccl plugin
 #
 # Use this stage when deploying on AWS infrastructure with EFA support
-FROM ${BASE_IMAGE} AS runtime-aws
+FROM ${EFA_BASE_IMAGE} AS aws
 ARG EFA_VERSION
+{% if target == "runtime" %}
 USER root
+{% endif %}
 # Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
 # Flags explanation:
@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \
 ENV EFA_VERSION="${EFA_VERSION}"
+{% if target == "runtime" %}
 USER dynamo
+{% endif %}
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
-########################################################################
-########## Development with AWS EFA (run.sh, runs as root user) ########
-########################################################################
-#
-# PURPOSE: Development environment with AWS EFA support
-#
-# This stage extends dev stages with development tools for building and
-# debugging on EFA-enabled AWS instances.
-FROM ${BASE_IMAGE} AS dev-aws
-ARG EFA_VERSION
-# Dev stage runs as root, no USER switch needed
-# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
-# Flags explanation:
-#   --skip-kmod: Skip kernel module installation (handled by host)
-#   --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
-#   --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
-RUN mkdir -p /tmp/efa && \
-    cd /tmp/efa && \
-    curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
-        https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
-    tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
-    cd aws-efa-installer && \
-    apt-get update && \
-    ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
-    rm -rf /tmp/efa && \
-    rm -rf /opt/amazon/aws-ofi-nccl && \
-    ldconfig
-ENV EFA_VERSION="${EFA_VERSION}"
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []