feat: Dockerfile templating (#5633)

Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>

feat: Dockerfile templating (#5633)
Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>
ac020629 · Dillon Cullinan · GitHub · 5755a8de · ac020629 · ac020629
Unverified Commit ac020629 authored Feb 10, 2026 by Dillon Cullinan Committed by GitHub Feb 10, 2026
20 changed files
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
 ```bash
 # Single command approach (recommended)
 export FRAMEWORK=VLLM         # Note: any of VLLM, SGLANG, TRTLLM can be used
-./container/build.sh --framework $FRAMEWORK --target local-dev
+python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
 # Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
 ```
-Alternatively, you can build a development container, then build local-dev:
-```bash
-export FRAMEWORK=VLLM
-./container/build.sh --framework $FRAMEWORK
-# Now you have a development image dynamo:latest-vllm
-./container/build.sh --dev-image dynamo:latest-${FRAMEWORK,,}
-# Now you have a local-dev image dynamo:latest-vllm-local-dev
-```
 The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.).
 ### Step 1: Choose Your Framework
@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting
   # If missing, build the dev image first, then build local-dev
   export FRAMEWORK=VLLM  # Replace with VLLM, SGLANG, or TRTLLM
-   ./container/build.sh --framework $FRAMEWORK
+   python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
-   # change to lower case portable way across shells
+   docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
-   ./container/build.sh --dev-image dynamo:latest-$(echo "$FRAMEWORK" | tr '[:upper:]' '[:lower:]') --framework "$FRAMEWORK"
-   # Now you have dynamo:latest-vllm-local-dev
   ```
 2. **Container startup failure:**

--- a/.dockerignore
+++ b/.dockerignore
@@ -42,6 +42,7 @@
 **/target/*
 **/*safetensors
 container/Dockerfile*
+container/*.Dockerfile
 .venv
 .venv-docs

--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
 name: 'Docker Build'
 description: 'Build Dynamo container images'
 inputs:
+  # --- Common Docker Inputs
  framework:
    description: 'Framework to build'
    required: true
@@ -13,9 +14,14 @@ inputs:
    description: 'Docker platform to build on, ie. linux/amd64'
    required: false
    default: 'linux/amd64'
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: true
  image_tag:
    description: 'Custom image tag (optional, defaults to framework:latest)'
    required: false
+  # --- Secret Inputs
  ci_token:
    description: 'CI Token'
    required: false
@@ -34,21 +40,6 @@ inputs:
  aws_secret_access_key:
    description: 'AWS Secret Access Key'
    required: false
-  base_image_tag:
-    description: 'Optional override for base image tag passed to build.sh'
-    required: false
-  runtime_image_tag:
-    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
-    required: false
-  cuda_version:
-    description: 'Optional override for CUDA_VERSION build-arg'
-    required: true
-  enable_kvbm:
-    description: 'Enable KVBM support (optional)'
-    required: false
-  dynamo_base_image:
-    description: 'Pre-built Dynamo base image to use instead of building from scratch'
-    required: false
 outputs:
  image_tag:
@@ -70,6 +61,44 @@ runs:
      shell: bash
      run: |
        docker system prune -af
+    - name: Set up Python
+      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
+      with:
+        python-version: '3.12'
+        pip-install: jinja2 pyyaml
+    - name: Generate Dockerfile
+      shell: bash
+      run: |
+        echo "::group::Generating Dockerfile"
+        echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+        python ./container/render.py \
+            --target=${{ inputs.target }} \
+            --framework=${{ inputs.framework }} \
+            --platform=${{ inputs.platform }} \
+            --cuda-version=${{ inputs.cuda_version }} \
+            --show-result \
+            --short-output
+        echo "::endgroup::"
+    - name: Build EPP image
+      if: ${{ inputs.target == 'frontend' }}
+      shell: bash
+      env:
+        ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
+      run: |
+        sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
+        curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
+        . "$HOME/.cargo/env"
+        echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
+        cargo install cbindgen
+        DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
+        pushd deploy/inference-gateway/epp
+        make all DOCKER_PROXY=${DOCKER_PROXY}
+        popd
+        EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
+        EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
+        echo "EPP_IMAGE=${EPP_IMAGE}" >> $GITHUB_ENV
    - name: Build image
      id: build
      shell: bash
@@ -107,50 +136,34 @@ runs:
        echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
        echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
-        # Collect optional overrides provided by the workflow
        # Set base cache args and set --cache-to if this is a main commit
        # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
-        EXTRA_ARGS=""
+        CACHE_ARGS=""
        if [[ "${{ inputs.target }}" != "frontend" ]]; then
-          EXTRA_ARGS="--cache-to type=inline "
+          CACHE_ARGS="--cache-to type=inline "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
          if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
            # Release branches also use release cache
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          fi
        fi
-        echo "$EXTRA_ARGS"
+        EPP_IMAGE_ARG=""
-        # Collect optional overrides provided by the workflow
+        if [[ ${{ inputs.target }} == "frontend" ]]; then
-        if [ -n "${{ inputs.base_image_tag }}" ]; then
+          EPP_IMAGE_ARG="--build-arg EPP_IMAGE=${EPP_IMAGE}"
-          EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
-        fi
-        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
-          EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
-        fi
-        if [ -n "${{ inputs.cuda_version }}" ]; then
-          EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
-        fi
-        if [ -n "${{ inputs.dynamo_base_image }}" ]; then
-          EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
-        fi
-        if [ -n "${{ inputs.enable_kvbm }}" ]; then
-          EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
        fi
-        # Execute build and capture output (show on console AND save to file)
+        docker buildx build \
-        ./container/build.sh --tag "$IMAGE_TAG" \
+          --progress=plain \
-          --target ${{ inputs.target }} \
+          --tag "$IMAGE_TAG" \
-          --vllm-max-jobs 10 \
+          --load \
-          --framework ${{ inputs.framework }} \
+          -f ./container/rendered.Dockerfile \
-          --platform ${{ inputs.platform }} \
+          $CACHE_ARGS \
-          --use-sccache \
+          $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
-          --sccache-bucket "$SCCACHE_S3_BUCKET" \
-          --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
        BUILD_EXIT_CODE=${PIPESTATUS[0]}

--- a/.github/actions/docker-remote-build/action.yml
+++ b/.github/actions/docker-remote-build/action.yml
@@ -13,6 +13,9 @@ inputs:
    description: 'Docker platform to build on, ie. linux/amd64'
    required: false
    default: 'linux/amd64'
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: true
  image_tag:
    description: 'Custom image tag'
    required: true
@@ -34,21 +37,6 @@ inputs:
  aws_secret_access_key:
    description: 'AWS Secret Access Key'
    required: false
-  base_image_tag:
-    description: 'Optional override for base image tag passed to build.sh'
-    required: false
-  runtime_image_tag:
-    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
-    required: false
-  cuda_version:
-    description: 'Optional override for CUDA_VERSION build-arg'
-    required: true
-  enable_kvbm:
-    description: 'Enable KVBM support (optional)'
-    required: false
-  dynamo_base_image:
-    description: 'Pre-built Dynamo base image to use instead of building from scratch'
-    required: false
  no_cache:
    description: 'Disable Docker build cache'
    required: false
@@ -117,21 +105,20 @@ runs:
        # Collect optional overrides provided by the workflow
        # Set base cache args and set --cache-to if this is a main commit
        # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
-        EXTRA_ARGS=""
+        CACHE_ARGS=""
        if [[ "${{ inputs.target }}" != "frontend" ]]; then
-          EXTRA_ARGS="--cache-to type=inline "
+          CACHE_ARGS="--cache-to type=inline "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
-          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
          if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
            # Release branches also use release cache
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+            CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
          fi
        fi
+        echo "$CACHE_ARGS"
-        echo "$EXTRA_ARGS"
        # Collect optional overrides provided by the workflow
        if [[ "${{ inputs.ci }}" == "true" ]]; then
@@ -139,21 +126,6 @@ runs:
          EXTRA_ARGS+=" --ci"
        fi
-        if [ -n "${{ inputs.base_image_tag }}" ]; then
-          EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
-        fi
-        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
-          EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
-        fi
-        if [ -n "${{ inputs.cuda_version }}" ]; then
-          EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
-        fi
-        if [ -n "${{ inputs.dynamo_base_image }}" ]; then
-          EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
-        fi
-        if [ -n "${{ inputs.enable_kvbm }}" ]; then
-          EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
-        fi
        if [ "${{ inputs.no_cache }}" == "true" ]; then
          EXTRA_ARGS+=" --no-cache"
        fi
@@ -161,9 +133,9 @@ runs:
          EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
        fi
        if [ "${{ inputs.push_image }}" == "true" ]; then
-          EXTRA_ARGS+=" --push --no-load"
+          EXTRA_ARGS+=" --push"
-        elif [ "${{ inputs.no_load }}" == "true" ]; then
+        elif [ "${{ inputs.no_load }}" == "false" ]; then
-          EXTRA_ARGS+=" --no-load"
+          EXTRA_ARGS+=" --load"
        fi
        # Add extra tags (each as a separate --tag argument)
@@ -176,16 +148,14 @@ runs:
          done <<< "$EXTRA_TAGS"
        fi
-        # Execute build and capture output (show on console AND save to file)
+        docker buildx build \
-        ./container/build.sh --tag "$IMAGE_TAG" \
+          --progress=plain \
-          --target ${{ inputs.target }} \
+          --tag "$IMAGE_TAG" \
-          --vllm-max-jobs 10 \
+          --platform linux/${{ inputs.platform }} \
-          --no-tag-latest \
+          -f ./container/rendered.Dockerfile \
-          --framework ${{ inputs.framework }} \
+          $CACHE_ARGS \
-          --platform ${{ inputs.platform }} \
+          $EXTRA_ARGS \
-          --sccache-bucket "$SCCACHE_S3_BUCKET" \
+          $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
-          --sccache-region "$AWS_DEFAULT_REGION" \
-          $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
        BUILD_EXIT_CODE=${PIPESTATUS[0]}

--- a/.github/filters.yaml
+++ b/.github/filters.yaml
@@ -63,6 +63,9 @@ ignore:
  - 'container/run.sh'
  - 'container/use-sccache.sh'
  - 'container/dev/**'
+  - 'container/templates/aws.Dockerfile'
+  - 'container/templates/local_dev.Dockerfile'
+  - 'container/templates/dev.Dockerfile'
 ci: &ci
  - '.github/workflows/**'
@@ -78,6 +81,12 @@ core:
  - *ci
  - 'container/build.sh'
  - 'container/Dockerfile'
+  - 'container/render.py'
+  - 'container/Dockerfile.template'
+  - 'container/context.yaml'
+  - 'container/templates/args.Dockerfile'
+  - 'container/templates/dynamo_*'
+  - 'container/templates/wheel_builder.Dockerfile'
  - '.dockerignore'
  - 'container/deps/*'
  - '.cargo/config.toml'
@@ -120,6 +129,7 @@ vllm:
  - 'container/deps/vllm/**'
  - 'examples/backends/vllm/**'
  - 'components/src/dynamo/vllm/**'
+  - 'container/templates/vllm_*'
 sglang:
  - '!**/*.md'
@@ -127,6 +137,7 @@ sglang:
  - 'container/Dockerfile.sglang'
  - 'examples/backends/sglang/**'
  - 'components/src/dynamo/sglang/**'
+  - 'container/templates/sglang_*'
 trtllm:
  - '!**/*.md'
@@ -136,6 +147,7 @@ trtllm:
  - 'examples/backends/trtllm/**'
  - 'components/src/dynamo/trtllm/**'
  - 'container/build_trtllm_wheel.sh'
+  - 'container/templates/trtllm_*'
 frontend:
  - '!**/*.md'
@@ -153,6 +165,7 @@ frontend:
  - 'components/src/dynamo/frontend/**'
  - 'components/src/dynamo/common/**'
  - 'deploy/inference-gateway/**'
+  - 'container/templates/frontend.Dockerfile'
 rust:
  - '.github/workflows/pre-merge.yml'

--- a/.github/workflows/build-frontend-image.yaml
+++ b/.github/workflows/build-frontend-image.yaml
@@ -68,21 +68,6 @@ jobs:
        uses: actions/setup-go@v5
        with:
          go-version: '1.24'
-      - name: Install dependencies
-        shell: bash
-        run: |
-          set -euo pipefail
-          # Install system dependencies from apt
-          sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
-          # Install Rust (cargo + rustc)
-          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
-          # Make cargo available to later steps
-          echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
-      - name: Install cbindgen
-        shell: bash
-        run: |
-          set -euo pipefail
-          cargo install cbindgen
      - name: Docker Login
        uses: ./.github/actions/docker-login
        with:
@@ -95,10 +80,10 @@ jobs:
        id: build-image
        uses: ./.github/actions/docker-build
        env:
-          PLATFORMS: linux/${{ matrix.platform.arch }}
+          PLATFORMS: ${{ matrix.platform.arch }}
          TARGETARCH: ${{ matrix.platform.arch }}
        with:
-          framework: none
+          framework: dynamo
          target: frontend
          platform: ${{ env.PLATFORMS }}
          ci_token: ${{ secrets.CI_TOKEN }}

--- a/.github/workflows/build-test-distribute-flavor-matrix.yml
+++ b/.github/workflows/build-test-distribute-flavor-matrix.yml
@@ -10,6 +10,10 @@ on:
        description: 'Framework name (vllm, sglang, trtllm)'
        required: true
        type: string
+      target:
+        description: 'Target stage for Docker rendering'
+        required: true
+        type: string
      platforms:
        description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
        required: true
@@ -86,6 +90,7 @@ jobs:
    with:
      framework: ${{ inputs.framework }}
      platform: ${{ matrix.platform }}
+      target: ${{ inputs.target }}
      cuda_version: ${{ matrix.cuda_version }}
      extra_tags: ${{ inputs.extra_tags }}
      no_cache: ${{ inputs.no_cache }}

--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -10,6 +10,10 @@ on:
        description: 'Framework name (vllm, sglang, trtllm)'
        required: true
        type: string
+      target:
+        description: 'Target stage for Docker rendering'
+        required: true
+        type: string
      platform:
        description: 'Platform to build (amd64 or arm64)'
        required: true
@@ -105,13 +109,17 @@ jobs:
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
        with:
          lfs: true
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
+        with:
+          python-version: '3.12'
+          pip-install: jinja2 pyyaml
      - name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
        id: extra-tags
        shell: bash
        env:
          EXTRA_TAGS: ${{ inputs.extra_tags }}
-          PLATFORM: ${{ inputs.platform }}
+          PLATFORM: linux/${{ inputs.platform }}
        run: |
          if [ -n "$EXTRA_TAGS" ]; then
            RESULT=""
@@ -134,7 +142,6 @@ jobs:
          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
      - name: Calculate target tag
        id: calculate-target-tag
        shell: bash
@@ -146,7 +153,6 @@ jobs:
          echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
          echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
      - name: Initialize Dynamo Builder
        uses: ./.github/actions/init-dynamo-builder
        with:
@@ -154,28 +160,37 @@ jobs:
          flavor: ${{ inputs.framework }}
          arch: ${{ inputs.platform }}
          cuda_version: ${{ inputs.cuda_version }}
      - name: Print Build Container inputs
        run: |
          echo "=== Build Container Inputs ==="
          echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
          echo "framework: ${{ inputs.framework }}"
          echo "target: runtime"
-          echo "platform: linux/${{ inputs.platform }}"
+          echo "platform: ${{ inputs.platform }}"
          echo "cuda_version: ${{ inputs.cuda_version }}"
          echo "no_cache: ${{ inputs.no_cache }}"
          echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
          echo "push_image: ${{ inputs.push_image }}"
          echo "no_load: ${{ inputs.no_load }}"
+      - name: Generate Dockerfile
+        shell: bash
+        run: |
+          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          python ./container/render.py \
+              --target=${{ inputs.target }} \
+              --framework=${{ inputs.framework }} \
+              --platform=${{ inputs.platform }} \
+              --cuda-version=${{ inputs.cuda_version }} \
+              --show-result \
+              --short-output
      - name: Build Container
        id: build-image
        uses: ./.github/actions/docker-remote-build
        with:
          image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
          framework: ${{ inputs.framework }}
-          target: runtime
+          target: ${{ inputs.target }}
-          platform: linux/${{ inputs.platform }}
+          platform: ${{ inputs.platform }}
          cuda_version: ${{ inputs.cuda_version }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}

--- a/.github/workflows/ci-test-suite.yml
+++ b/.github/workflows/ci-test-suite.yml
@@ -99,17 +99,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/amd64
+          platform: amd64
-          base_image_tag: ''
+          cuda_version: '12.9'
-          runtime_image_tag: ''
+          image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
-          cuda_version: ''
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
      - name: Tag and Push Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -132,13 +130,9 @@ jobs:
      matrix:
        include:
          - framework: vllm
-            base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
-            runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
            cuda_version: '12.9'
          - framework: sglang
-            base_image_tag: ''
+            cuda_version: '12.9'
-            runtime_image_tag: ''
-            cuda_version: ''
    env:
      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
    steps:
@@ -158,17 +152,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/arm64
+          platform: arm64
-          base_image_tag: ${{ matrix.base_image_tag }}
-          runtime_image_tag: ${{ matrix.runtime_image_tag }}
          cuda_version: ${{ matrix.cuda_version }}
+          image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
      - name: Tag and Push Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -210,17 +202,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/amd64
+          platform: amd64
-          base_image_tag: ''
-          runtime_image_tag: ''
          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
+          image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
      - name: Tag and Push CUDA 13 Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:
@@ -262,17 +252,15 @@ jobs:
        with:
          framework: ${{ matrix.framework }}
          target: runtime
-          platform: linux/arm64
+          platform: arm64
-          base_image_tag: ''
-          runtime_image_tag: ''
          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
+          image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
      - name: Tag and Push CUDA 13 Runtime Images
        uses: ./.github/actions/docker-tag-push
        with:

--- a/.github/workflows/container-validation-dynamo.yml
+++ b/.github/workflows/container-validation-dynamo.yml
@@ -66,11 +66,27 @@ jobs:
        id: define_image_tag
        run: |
          echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
+      - name: Generate Dockerfile
+        shell: bash
+        run: |
+          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          python ./container/render.py \
+              --target=dev \
+              --framework=dynamo \
+              --platform=amd64 \
+              --show-result \
+              --short-output
      - name: Build image
        env:
          GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
        run: |
-          ./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg
+          docker buildx build \
+            --progress=plain \
+            --tag ${{ steps.define_image_tag.outputs.image_tag }} \
+            -f ./container/rendered.Dockerfile \
+            --build-arg ENABLE_MEDIA_FFMPEG=true \
+            --build-arg ENABLE_KVBM=true \
+            --load .
      - name: Start services with docker-compose
        working-directory: ./deploy
        run: |

--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -177,6 +177,7 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: vllm
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["12.9", "13.0"]'
      extra_tags: |
@@ -194,6 +195,7 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: sglang
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["12.9", "13.0"]'
      extra_tags: |
@@ -211,11 +213,12 @@ jobs:
    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
      framework: trtllm
+      target: runtime
      platforms: '["amd64", "arm64"]'
      cuda_versions: '["13.1"]'
      extra_tags: |
        ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
-        ${{ github.ref_name == 'main' && format('main-trt-llm-{0}', github.sha) || '' }}
+        ${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
    secrets: inherit

--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 .vs/
 .vscode/
 .helix
+*rendered.Dockerfile
 [Bb]inlog/
 [Bb][Uu][Ii][Ll][Dd]/
 [Oo][Bb][Jj]/

--- a/container/Dockerfile
+++ b/container/Dockerfile
--- a/container/Dockerfile.template
+++ b/container/Dockerfile.template
+# syntax=docker/dockerfile:1.10.0-labs
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+{% include "templates/args.Dockerfile" %}
+# --- Base Image Stages
+{% if framework != "dynamo" %}
+    {% include "templates/dynamo_base.Dockerfile" %}
+    {% include "templates/wheel_builder.Dockerfile" %}
+{% elif framework == "dynamo" %}
+    {% if target == "frontend" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+        {% include "templates/dynamo_runtime.Dockerfile" %}
+        {% include "templates/frontend.Dockerfile" %}
+    {% elif target == "runtime" or target == "dev" or target == "local-dev" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+        {% include "templates/dynamo_runtime.Dockerfile" %}
+    {% elif target == "wheel_builder" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+        {% include "templates/wheel_builder.Dockerfile" %}
+    {% elif target == "base" %}
+        {% include "templates/dynamo_base.Dockerfile" %}
+    {% endif %}
+{% endif %}
+# --- Framework Stages
+{% if framework == "sglang" %}
+    # SGLang is the only framework without a `framework` target currently, needs special treatment
+    {% include "templates/sglang_runtime.Dockerfile" %}
+{% elif target == "framework" and framework != "dynamo" %}
+    {% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
+{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
+    {% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
+    {% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
+{% else %}
+# --- No framework stages included
+{% endif %}
+{% if make_efa == true and target == "runtime" %}
+    {% include "templates/aws.Dockerfile" %}
+{% endif %}
+# --- Development Stages
+{% if target == "dev" or target == "local-dev" %}
+    {% include "templates/dev.Dockerfile" %}
+    {% if make_efa == true %}
+        {% include "templates/aws.Dockerfile" %}
+    {% endif %}
+    {% if target == "local-dev" %}
+        {% include "templates/local_dev.Dockerfile" %}
+    {% endif %}
+{% else %}
+# --- No development stages included
+{% endif %}
--- a/container/README.md
+++ b/container/README.md
@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint
 ### Core Components
- **`build.sh`** - A Docker image builder that creates containers for different AI inference frameworks (vLLM, TensorRT-LLM, SGLang). It handles framework-specific dependencies, multi-stage builds, and development vs production configurations.
+- **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
 - **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
- **Multiple Dockerfiles** - Framework-specific Dockerfiles that define the container images:
+## Rendering Requirements:
-  - `Dockerfile.vllm` - For vLLM inference backend
+- Python
-  - `Dockerfile.trtllm` - For TensorRT-LLM inference backend
+- Python Packages:
-  - `Dockerfile.sglang` - For SGLang inference backend
+  - pyyaml
-  - `Dockerfile` - Base/standalone configuration
+  - jinja2
-  - `Dockerfile.epp` - For building the Endpoint Picker (EPP) image
 ### Stage Summary for Frameworks
 <details>
 <summary>Show Stage Summary Table</summary>
-Dockerfile.${FRAMEWORK} General Structure
+Dockerfile General Structure
 Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist.
@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh
 ### Convenience Scripts vs Direct Docker Commands
-The `build.sh` and `run.sh` scripts are convenience wrappers that simplify common Docker operations. They automatically handle:
+The `run.sh` script and rendering scripts are convenience that simplify common Docker operations. They automatically handle:
- Framework-specific image selection and tagging
 - GPU access configuration and runtime selection
 - Volume mount setup for development workflows
 - Environment variable management
 - Build argument construction for multi-stage builds
-**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The scripts use `--dry-run` flags to show you the exact Docker commands they would execute, making it easy to understand and modify the underlying operations.
+**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The `run.sh` uses a `--dry-run` flag to show you the exact commands they would execute, making it easy to understand and modify the underlying operations.
 ## Development Targets Feature Matrix
@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
 ### 1. runtime target (runs as non-root dynamo user):
 ```bash
 # Build runtime image
-./build.sh --framework vllm --target runtime
+python container/render.py --framework vllm --target runtime --short-output
+docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
 # Run runtime container
-./run.sh --image dynamo:latest-vllm-runtime -it
+container/run.sh --image dynamo:latest-vllm-runtime -it
 ```
 ### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID):
@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration.
 ## Build and Run Scripts Overview
-### build.sh - Docker Image Builder
+### render.py - Docker Image Generator
-The `build.sh` script is responsible for building Docker images for different AI inference frameworks. It supports multiple frameworks and configurations:
+The `render.py` script is responsible for generating Dockerfiles for different AI inference frameworks. It supports multiple frameworks and configurations:
 **Purpose:**
- Builds Docker images for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
+- Generates Dockerfiles for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
 - Handles framework-specific dependencies and optimizations
 - Manages build contexts, caching, and multi-stage builds
 - Configures development vs production targets
 **Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE
+- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE (standalone Dynamo)
 - **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`.
+- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `render.py`.
 - **Build Caching**: Docker layer caching and sccache support
 - **GPU Optimization**: CUDA, EFA, and NIXL support
@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles):
 Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
-**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
-  - the selected framework Dockerfile, then
-  - `container/dev/Dockerfile.dev`
-  `build.sh` then continues building normally using the temp Dockerfile path.
-**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
 > **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
 **Common Usage Examples:**
 ```bash
 # Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
-./build.sh
+python container/render.py --framework=vllm --target=dev --short-output
+docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
 # Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
 # which is useful when mounting partitions for development.
-./build.sh --framework vllm --target local-dev
+python container/render.py --framework=vllm --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
 # Build TensorRT-LLM development image called dynamo:latest-trtllm
-./build.sh --framework trtllm
+python container/render.py --framework=trtllm --target=runtime --short-output
+docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
-# Build with custom tag
-./build.sh --framework sglang --tag my-custom-tag
-# Dry run to see commands
-./build.sh --dry-run
-# Build with no cache
-./build.sh --no-cache
-# Build with build arguments
-./build.sh --build-arg CUSTOM_ARG=value
 ```
 ### Building the Frontend Image
 The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments.
+**Build EPP Image**
+```bash
+sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
+curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
+. "$HOME/.cargo/env"
+cargo install cbindgen
+pushd deploy/inference-gateway/epp
+make all
+popd
+EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
+EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
+```
+**Build Frontend Image**
 ```bash
 # Build the frontend image (automatically builds EPP image as a dependency)
-./build.sh --framework none --target frontend
+python container/render.py --framework=dynamo --target=frontend --short-output
+docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
 ```
 The build process automatically:
@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio
 ```bash
 # Basic container launch with dev image (runs as root by default, non-interactive)
-./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
+container/run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
 # Interactive development with workspace mounted using dev image (runs as root)
-./run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Interactive development with local-dev image (runs as dynamo user with matched host UID/GID)
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Use specific image and framework for development
-./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Interactive development shell with workspace mounted (local-dev)
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
 # Development with custom environment variables
-./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Dry run to see docker command
-./run.sh --dry-run
+container/run.sh --dry-run
 # Development with custom volume mounts
-./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Run runtime image as non-root dynamo user (for production)
-./run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
 # Run dev image as specific user (override default root)
-./run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
 ```
 ### Network Configuration Options
@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### Host Networking (Default)
 ```bash
 # Examples with dynamo user
-./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
-./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
 ```
 **Use cases:**
 - High-performance ML inference (default for GPU workloads)
@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### Bridge Networking (Isolated)
 ```bash
 # CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI)
-./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
 ```
 **Use cases:**
 - Secure isolation from host network
@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag
 #### No Networking ⚠️ **LIMITED FUNCTIONALITY**
 ```bash
 # Complete network isolation - no external connectivity
-./run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 # Same with local-dev image (dynamo user with matched host UID/GID)
-./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
 ```
 **⚠️ WARNING: `--network none` severely limits Dynamo functionality:**
 - **No model downloads** - HuggingFace models cannot be downloaded
@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management.
 ### Development Workflow
 ```bash
 # 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
-./build.sh --framework vllm --target local-dev
+python container/render.py --framework=vllm --target=local-dev --short-output
+docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
 # 2. Run development container using the local-dev image
 # RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
-./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
+container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
 # 3. Inside container, run inference (requires both frontend and backend)
 # Start frontend
@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
 ### Production Workflow
 ```bash
 # 1. Build production runtime image (runs as non-root dynamo user)
-./build.sh --framework vllm --target runtime
+python container/render.py --framework=vllm --target=runtime --short-output
+docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
 # 2. Run production container as non-root dynamo user
-./run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
+container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
 ```
 ### Testing Workflow
 ```bash
 # 1. Build dev image
-./build.sh --framework vllm --no-cache
+python container/render.py --framework=vllm --target=dev --short-output
+docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
 # 2. Run tests with network isolation for reproducible results (no -it needed for CI)
-./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
+container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
 # 3. Inside the container with bridge networking, start services
 # Note: Services are only accessible from the same container - no port conflicts with host

--- a/container/build.sh
+++ b/container/build.sh
--- a/container/context.yaml
+++ b/container/context.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# This file represents the default ARG values of Dockerfiles generated
+# by render.py. These are the recommended default values for users and
+# is the source of truth for the values used in our delivered images.
+#
+# Some ARGs have multiple valid values and can be changed for local testing,
+# you can do so locally in this file, or pass the --build-arg into docker build
+# when building.
+dynamo:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  base_image_tag: 25.01-cuda12.8-devel-ubuntu24.04
+  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
+  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
+  python_version: "3.12"
+  nats_version: v2.10.28
+  etcd_version: v3.5.21
+  nixl_ref: 0.9.0
+  nixl_ucx_ref: v1.20.0
+  nixl_gdrcopy_ref: v2.5.1
+  nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
+  nixl_libfabric_ref: v2.3.0
+  enable_kvbm: "false"
+  enable_media_nixl: "false"
+  enable_media_ffmpeg: "false"
+  enable_gpu_memory_service: "false"
+  ffmpeg_version: "7.1"
+  efa_version: 1.45.1
+vllm:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image: nvcr.io/nvidia/cuda
+  runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
+  vllm_ref: v0.14.1
+  flashinf_ref: v0.5.3
+  lmcache_ref: 0.3.12
+  max_jobs: "10"
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "true"
+  enable_kvbm: "true"
+  cuda12.9:
+    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+  cuda13.0:
+    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
+sglang:
+  base_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image: lmsysorg/sglang
+  cuda12.9:
+    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+    runtime_image_tag: v0.5.7-runtime
+  cuda13.0:
+    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
+    runtime_image_tag: v0.5.8-cu130-runtime
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "true"
+  enable_kvbm: "false"
+trtllm:
+  base_image: nvcr.io/nvidia/pytorch
+  base_image_tag: 25.12-py3
+  runtime_image: nvcr.io/nvidia/cuda-dl-base
+  runtime_image_tag: 25.10-cuda13.0-runtime-ubuntu24.04
+  enable_media_nixl: "true"
+  enable_media_ffmpeg: "true"
+  enable_gpu_memory_service: "false"
+  enable_kvbm: "true"
+  python_version: "3.12"
+  index_url: https://pypi.nvidia.com/
+  pip_wheel_dir: /tmp/trtllm_wheel/
+  pip_wheel: tensorrt-llm==1.3.0rc1
+  trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
+  github_trtllm_commit: 1.2.0rc6
+  torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
+  torch_tensorrt_version: 2.10.0a0
+  torchvision_version: 0.25.0a0+ca221243
+  torchao_ver: 0.15.0+git01374eb5
+  torchdata_ver: 0.11.0
+  torchtitan_ver: 0.2.0
+  jinja2_version: 3.1.6
+  sympy_version: 1.14.0
+  pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
+  flash_attn_version: 2.7.4.post1+25.12
+  flashinfer_python_ver: 0.6.1
+  has_trtllm_context: "0"
--- a/container/render.py
+++ b/container/render.py
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import argparse
+import re
+import sys
+from pathlib import Path
+import yaml
+from jinja2 import Environment, FileSystemLoader
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Renders dynamo Dockerfiles from templates"
+    )
+    parser.add_argument(
+        "--framework",
+        type=str,
+        default="vllm",
+        help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
+    )
+    parser.add_argument(
+        "--target",
+        type=str,
+        default="runtime",
+        help="Dockerfile target to use. Non-exhaustive examples: [runtime, dev, local-dev]",
+    )
+    parser.add_argument(
+        "--platform",
+        type=str,
+        default="amd64",
+        help="Dockerfile platform to use. [amdg64, arm64]",
+    )
+    parser.add_argument(
+        "--cuda-version",
+        type=str,
+        default="12.9",
+        help="CUDA version to use. [12.9, 13.0]",
+    )
+    parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
+    parser.add_argument(
+        "--short-output",
+        action="store_true",
+        help="Output filename is just rendered.Dockerfile",
+    )
+    parser.add_argument(
+        "--show-result",
+        action="store_true",
+        help="Prints the rendered Dockerfile to stdout.",
+    )
+    args = parser.parse_args()
+    return args
+def validate_args(args):
+    # TODO: Add validation logic
+    return
+def render(args, context, script_dir):
+    env = Environment(
+        loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
+    )
+    template = env.get_template("Dockerfile.template")
+    rendered = template.render(
+        context=context,
+        framework=args.framework,
+        target=args.target,
+        platform=args.platform,
+        cuda_version=args.cuda_version,
+        make_efa=args.make_efa,
+    )
+    # Replace all instances of 3+ newlines with 2 newlines
+    cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
+    if args.short_output:
+        filename = "rendered.Dockerfile"
+    else:
+        filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
+    with open(f"{script_dir}/{filename}", "w") as f:
+        f.write(cleaned)
+    if args.show_result:
+        print("##############")
+        print("# Dockerfile #")
+        print("##############")
+        print(cleaned)
+        print("##############")
+    print(f"INFO: Generated Dockerfile written to {script_dir}/{filename}")
+    return
+def main():
+    args = parse_args()
+    validate_args(args)
+    script_dir = Path(sys.argv[0]).parent
+    with open(f"{script_dir}/context.yaml", "r") as f:
+        context = yaml.safe_load(f)
+    render(args, context, script_dir)
+    if args.target == "local-dev":
+        print(
+            "INFO: Remember to add --build-arg values for USER_UID and USER_GID when building a local-dev image!"
+        )
+        print(
+            "      Recommendation: --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g)"
+        )
+if __name__ == "__main__":
+    main()
--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
+{#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#}
+##########################
+#### Build Arguments #####
+##########################
+# Define general architecture ARGs for supporting both x86 and aarch64 builds.
+#   ARCH: Used for package suffixes (e.g., amd64, arm64)
+#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
+#
+# Default values are for x86/amd64:
+#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
+#
+# For arm64/aarch64, build with:
+#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
+#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
+ARG ARCH={{ platform }}
+ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
+# Python/CUDA configuration
+ARG PYTHON_VERSION={{ context.dynamo.python_version }}
+ARG CUDA_VERSION={{ cuda_version }}
+ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
+{% if framework == "vllm" or framework == "sglang" -%}
+{% set cuda_context_key = "cuda" + cuda_version %}
+# Base image configuration
+ARG BASE_IMAGE={{ context[framework].base_image }}
+ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
+{% elif framework != "vllm" and framework != "sglang" -%}
+ARG BASE_IMAGE={{ context[framework].base_image }}
+ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
+{%- endif %}
+{% if framework == "sglang" -%}
+{% set cuda_context_key = "cuda" + cuda_version %}
+# Base image configuration
+ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
+ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
+{% elif framework != "dynamo" -%}
+ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
+ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
+{%- endif %}
+# Build configuration
+ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
+ARG CARGO_BUILD_JOBS
+ARG NATS_VERSION={{ context.dynamo.nats_version }}
+ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
+ARG ENABLE_MEDIA_NIXL={{ context[framework].enable_media_nixl }}
+ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
+ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
+ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
+# SCCACHE configuration
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET=""
+ARG SCCACHE_REGION=""
+# NIXL configuration
+ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
+ARG NIXL_REF={{ context.dynamo.nixl_ref }}
+ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
+ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
+{% if target == "dev" or target == "local-dev" %}
+ARG FRAMEWORK={{ framework }}
+{% endif %}
+{% if target == "frontend" %}
+ARG EPP_IMAGE={{ context.dynamo.epp_image }}
+ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
+{% endif %}
+{% if framework == "vllm" -%}
+# Make sure to update the dependency version in pyproject.toml when updating this
+ARG VLLM_REF={{ context.vllm.vllm_ref }}
+ARG MAX_JOBS={{ context.vllm.max_jobs }}
+# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
+ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
+ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
+# If left blank, then we will fallback to vLLM defaults
+ARG DEEPGEMM_REF=""
+{%- endif -%}
+{% if framework == "trtllm" %}
+# TensorRT-LLM specific configuration
+ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
+ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
+ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
+ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
+ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}
+# Copy pytorch installation from NGC PyTorch
+ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
+ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
+ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
+ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
+ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
+ARG TORCH_VER={{ context.trtllm.torch_version }}
+ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
+ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
+ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
+ARG SYMPY_VER={{ context.trtllm.sympy_version }}
+ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}
+# Python configuration
+ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
+{%- endif -%}
+{% if make_efa == true %}
+ARG EFA_VERSION={{ context.dynamo.efa_version }}
+ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
+{%- endif -%}
\ No newline at end of file
--- a/container/Dockerfile.aws
+++ b/container/Dockerfile.aws
-# syntax=docker/dockerfile:1.10.0
+{#
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+#}
+#############################
+########## AWS EFA ##########
+#############################
 #
-# PURPOSE: AWS EFA support layer
+# This stage extends the runtime/dev stage with AWS EFA installer
-#
-# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
-# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
-#
-# Usage (via build.sh with --make-efa flag):
-#   ./build.sh --framework vllm --target runtime --make-efa
-#   ./build.sh --framework vllm --target local-dev --make-efa
-ARG BASE_IMAGE
-ARG EFA_VERSION
-###########################################################
-########## Runtime with AWS EFA ##########################
-###########################################################
-#
-# This stage extends the runtime stage with AWS EFA installer
 # which includes: libfabric and aws-ofi-nccl plugin
 #
 # Use this stage when deploying on AWS infrastructure with EFA support
-FROM ${BASE_IMAGE} AS runtime-aws
+FROM ${EFA_BASE_IMAGE} AS aws
 ARG EFA_VERSION
+{% if target == "runtime" %}
 USER root
+{% endif %}
 # Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
 # Flags explanation:
@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \
 ENV EFA_VERSION="${EFA_VERSION}"
+{% if target == "runtime" %}
 USER dynamo
+{% endif %}
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
-########################################################################
-########## Development with AWS EFA (run.sh, runs as root user) ########
-########################################################################
-#
-# PURPOSE: Development environment with AWS EFA support
-#
-# This stage extends dev stages with development tools for building and
-# debugging on EFA-enabled AWS instances.
-FROM ${BASE_IMAGE} AS dev-aws
-ARG EFA_VERSION
-# Dev stage runs as root, no USER switch needed
-# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
-# Flags explanation:
-#   --skip-kmod: Skip kernel module installation (handled by host)
-#   --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
-#   --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
-RUN mkdir -p /tmp/efa && \
-    cd /tmp/efa && \
-    curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
-        https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
-    tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
-    cd aws-efa-installer && \
-    apt-get update && \
-    ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
-    rm -rf /tmp/efa && \
-    rm -rf /opt/amazon/aws-ofi-nccl && \
-    ldconfig
-ENV EFA_VERSION="${EFA_VERSION}"
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []