ci: automate dynamo rcs (#6572)

Signed-off-by: pvijayakrish <pvijayakrish@nvidia.com>

ci: automate dynamo rcs (#6572)
Signed-off-by: pvijayakrish <pvijayakrish@nvidia.com>
3dcc53d5 · Pavithra Vijayakrishnan · GitHub · 96fc1ab6 · 3dcc53d5 · 96fc1ab6
Unverified Commit 3dcc53d5 authored Feb 25, 2026 by Pavithra Vijayakrishnan Committed by GitHub Feb 25, 2026
3 changed files
--- a/.github/workflows/build-frontend-image.yaml
+++ b/.github/workflows/build-frontend-image.yaml
@@ -10,6 +10,17 @@ on:
    - "pull-request/[0-9]+"
    # Note: release/* branches are handled by release.yml which calls this workflow
  workflow_call:
+    inputs:
+      skip_change_detection:
+        description: 'Skip changed-files detection and always build (used by release pipeline)'
+        required: false
+        type: boolean
+        default: false
+      image_prefix:
+        description: 'Optional prefix for image tags (e.g., release-0.9.0). When set, images are also tagged as {prefix}-frontend-{arch}.'
+        required: false
+        type: string
+        default: ''
    secrets:
      AWS_ACCOUNT_ID:
        required: true
@@ -25,6 +36,8 @@ on:
        required: true
      AZURE_ACR_PASSWORD:
        required: true
+      CI_TOKEN:
+        required: false
      SCCACHE_S3_BUCKET:
        required: true
@@ -59,7 +72,7 @@ jobs:
  build-epp-image:
    name: Build EPP Image
    needs: changed-files
-    if: needs.changed-files.outputs.frontend == 'true'
+    if: needs.changed-files.outputs.frontend == 'true' || inputs.skip_change_detection == true
    runs-on: prod-builder-v3
    outputs:
      epp_image_ref: ${{ steps.build-epp-image.outputs.epp_image_ref }}
@@ -111,7 +124,7 @@ jobs:
  build-frontend-image:
    name: Build Frontend Image
    needs: [changed-files, build-epp-image]
-    if: needs.changed-files.outputs.frontend == 'true'
+    if: needs.changed-files.outputs.frontend == 'true' || inputs.skip_change_detection == true
    strategy:
      fail-fast: false
      matrix:
@@ -196,6 +209,7 @@ jobs:
            ${{ matrix.arch == 'amd64' && steps.calculate-target-tag.outputs.azure_target_image_uri || '' }}
            ${{ github.ref_name == 'main' && format('{0}:main-frontend-{1}', steps.calculate-target-tag.outputs.ecr_image_base, matrix.arch) || '' }}
            ${{ github.ref_name == 'main' && format('{0}:main-frontend-{1}-{2}', steps.calculate-target-tag.outputs.ecr_image_base, github.sha, matrix.arch) || '' }}
+            ${{ inputs.image_prefix != '' && format('{0}:{1}-frontend-{2}', steps.calculate-target-tag.outputs.ecr_image_base, inputs.image_prefix, matrix.arch) || '' }}
      - name: Show summary
        shell: bash

--- a/.github/workflows/ci-test-suite.yml
+++ b/.github/workflows/ci-test-suite.yml
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-# Reusable CI Test Suite Workflow
-# This workflow is called by nightly-ci.yml and post-merge-ci.yml
-# to run the full test suite with configurable parameters.
-name: CI Test Suite
-on:
-  workflow_call:
-    inputs:
-      pipeline_type:
-        description: 'Type of pipeline: nightly or post_merge'
-        required: true
-        type: string
-      include_nightly_marks:
-        description: 'Include nightly pytest marks in test selection'
-        required: true
-        type: boolean
-      image_prefix:
-        description: 'Prefix for image tags (nightly or main)'
-        required: true
-        type: string
-      enable_slack_notification:
-        description: 'Enable Slack notifications on completion'
-        required: false
-        type: boolean
-        default: false
-    secrets:
-      AWS_ACCOUNT_ID:
-        required: true
-      AWS_DEFAULT_REGION:
-        required: true
-      AWS_ACCESS_KEY_ID:
-        required: true
-      AWS_SECRET_ACCESS_KEY:
-        required: true
-      NGC_CI_ACCESS_TOKEN:
-        required: true
-      CI_TOKEN:
-        required: true
-      SCCACHE_S3_BUCKET:
-        required: true
-      AZURE_ACR_HOSTNAME:
-        required: true
-      AZURE_ACR_USER:
-        required: true
-      AZURE_ACR_PASSWORD:
-        required: true
-      SLACK_NOTIFY_NIGHTLY_WEBHOOK_URL:
-        required: false
-      SLACK_OPS_SUPPORT_GROUP_ID:
-        required: false
-      AZURE_AKS_CI_KUBECONFIG_B64:
-        required: false
-      HF_TOKEN:
-        required: false
-      DYNAMO_INGRESS_SUFFIX:
-        required: false
-permissions:
-  contents: read
-defaults:
-  run:
-    shell: bash --noprofile --norc -eo pipefail {0}
-env:
-  REGISTRY_IMAGE: ai-dynamo/dynamo
-  IMAGE_PREFIX: ${{ inputs.image_prefix }}
-############################## BUILD JOBS ##############################
-jobs:
-  build-amd64:
-    name: Build ${{ matrix.framework }} (amd64)
-    runs-on: prod-builder-amd-v1
-    timeout-minutes: 120
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, sglang]
-    env:
-      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-    steps:
-      - uses: actions/checkout@v4
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
-          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
-      - name: Build Runtime Image
-        id: build_runtime
-        uses: ./.github/actions/docker-build
-        with:
-          framework: ${{ matrix.framework }}
-          target: runtime
-          platform: amd64
-          cuda_version: '12.9'
-          image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
-          ci_token: ${{ secrets.CI_TOKEN }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      - name: Tag and Push Runtime Images
-        uses: ./.github/actions/docker-tag-push
-        with:
-          local_image: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
-          push_tags: |
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-amd64
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-amd64-run-${{ github.run_id }}
-          aws_push: 'true'
-          azure_push: 'true'
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-  build-arm64:
-    name: Build ${{ matrix.framework }} (arm64)
-    runs-on: prod-builder-arm-v1
-    timeout-minutes: 120
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - framework: vllm
-            cuda_version: '12.9'
-          - framework: sglang
-            cuda_version: '12.9'
-    env:
-      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-    steps:
-      - uses: actions/checkout@v4
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
-          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
-      - name: Build Runtime Image
-        id: build_runtime
-        uses: ./.github/actions/docker-build
-        with:
-          framework: ${{ matrix.framework }}
-          target: runtime
-          platform: arm64
-          cuda_version: ${{ matrix.cuda_version }}
-          image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
-          ci_token: ${{ secrets.CI_TOKEN }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      - name: Tag and Push Runtime Images
-        uses: ./.github/actions/docker-tag-push
-        with:
-          local_image: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
-          push_tags: |
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-arm64
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-arm64-run-${{ github.run_id }}
-          aws_push: 'true'
-          azure_push: 'true'
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-  # CUDA 13 builds (vllm and sglang only, both architectures)
-  build-cuda13-amd64:
-    name: Build ${{ matrix.framework }} CUDA13 (amd64)
-    runs-on: prod-builder-amd-v1
-    timeout-minutes: 120
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-    env:
-      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-    steps:
-      - uses: actions/checkout@v4
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
-          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
-      - name: Build CUDA 13 Runtime Image
-        id: build_runtime
-        uses: ./.github/actions/docker-build
-        with:
-          framework: ${{ matrix.framework }}
-          target: runtime
-          platform: amd64
-          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
-          ci_token: ${{ secrets.CI_TOKEN }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      - name: Tag and Push CUDA 13 Runtime Images
-        uses: ./.github/actions/docker-tag-push
-        with:
-          local_image: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
-          push_tags: |
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-amd64
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-amd64-run-${{ github.run_id }}
-            ${{ matrix.framework == 'trtllm' && format('{0}:{1}-{2}-amd64', env.REGISTRY_IMAGE, env.IMAGE_PREFIX, matrix.framework) || '' }}
-          aws_push: 'true'
-          azure_push: 'true'
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-  build-cuda13-arm64:
-    name: Build ${{ matrix.framework }} CUDA13 (arm64)
-    runs-on: prod-builder-arm-v1
-    timeout-minutes: 120
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-    env:
-      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-    steps:
-      - uses: actions/checkout@v4
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
-          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
-      - name: Build CUDA 13 Runtime Image
-        id: build_runtime
-        uses: ./.github/actions/docker-build
-        with:
-          framework: ${{ matrix.framework }}
-          target: runtime
-          platform: arm64
-          cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
-          image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
-          ci_token: ${{ secrets.CI_TOKEN }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      - name: Tag and Push CUDA 13 Runtime Images
-        uses: ./.github/actions/docker-tag-push
-        with:
-          local_image: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
-          push_tags: |
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-arm64
-            ${{ env.REGISTRY_IMAGE }}:${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-cuda13-arm64-run-${{ github.run_id }}
-            ${{ matrix.framework == 'trtllm' && format('{0}:{1}-{2}-arm64', env.REGISTRY_IMAGE, env.IMAGE_PREFIX, matrix.framework) || '' }}
-          aws_push: 'true'
-          azure_push: 'true'
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-############################## TEST JOBS ##############################
-  unit-tests:
-    name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-unit
-    needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
-    if: always() && inputs.skip_tests != true
-    runs-on: ${{ matrix.arch.runner }}
-    timeout-minutes: 45
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-        arch:
-          - arch: amd64
-            runner: prod-builder-amd-gpu-v1
-          - arch: arm64
-            runner: prod-builder-arm-v1
-    steps:
-      - uses: actions/checkout@v4
-      - name: Check if build succeeded
-        id: check_build
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set +x
-          echo "==========================================="
-          echo "DEBUG: Checking build status"
-          echo "==========================================="
-          echo "Framework: ${{ matrix.framework }}"
-          echo "Architecture: ${{ matrix.arch.arch }}"
-          echo "Repository: ${{ github.repository }}"
-          echo "Run ID: ${{ github.run_id }}"
-          BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
-          echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
-          # Query GitHub API for job status
-          echo ""
-          echo "Querying GitHub API..."
-          JOBS_RESPONSE=$(curl -s -S -L -w "\n%{http_code}" \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
-          HTTP_CODE=$(echo "$JOBS_RESPONSE" | tail -n1)
-          JOBS=$(echo "$JOBS_RESPONSE" | sed '$d')
-          echo "HTTP Response Code: $HTTP_CODE"
-          if [ "$HTTP_CODE" != "200" ]; then
-            echo "Error: GitHub API returned non-200 status code"
-            echo "Response: $JOBS"
-            exit 1
-          fi
-          # Debug: Show total jobs and all job names
-          TOTAL_JOBS=$(echo "$JOBS" | jq '.jobs | length')
-          echo ""
-          echo "Total jobs found: $TOTAL_JOBS"
-          echo ""
-          echo "All job names in this workflow run:"
-          echo "$JOBS" | jq -r '.jobs[] | "  - \(.name) [status: \(.status), conclusion: \(.conclusion)]"'
-          echo ""
-          # Try exact endswith match
-          echo "Searching for jobs ending with: '$BUILD_JOB_PATTERN'"
-          MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | endswith($pattern))] | length')
-          echo "Jobs matching endswith pattern: $MATCHING_JOBS"
-          if [ "$MATCHING_JOBS" -eq 0 ]; then
-            echo ""
-            echo "WARNING: No jobs found with endswith pattern"
-            echo "Trying contains pattern instead..."
-            MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | contains($pattern))] | length')
-            echo "Jobs matching contains pattern: $MATCHING_JOBS"
-            if [ "$MATCHING_JOBS" -gt 0 ]; then
-              BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .conclusion' | head -1)
-              MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .name' | head -1)
-            fi
-          else
-            BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .conclusion' | head -1)
-            MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .name' | head -1)
-          fi
-          echo ""
-          echo "==========================================="
-          echo "RESULT:"
-          echo "  Matched job: ${MATCHED_JOB_NAME:-none}"
-          echo "  Build status: ${BUILD_STATUS:-not found}"
-          echo "==========================================="
-          # Handle various status cases
-          if [ -z "$BUILD_STATUS" ] || [ "$BUILD_STATUS" = "null" ] || [ "$BUILD_STATUS" = "" ]; then
-            echo ""
-            echo "ERROR: Could not determine build status"
-            echo "This could mean:"
-            echo "  1. The build job is still running"
-            echo "  2. The job name pattern doesn't match"
-            echo "  3. The API response doesn't include this job yet"
-            exit 1
-          fi
-          if [ "$BUILD_STATUS" != "success" ]; then
-            echo ""
-            echo "ERROR: Build did not succeed (status: $BUILD_STATUS)"
-            exit 1
-          fi
-          echo ""
-          echo "✅ Build succeeded. Proceeding with tests."
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-      - name: Pull image
-        shell: bash
-        env:
-          ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          IMAGE_TAG: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-        run: |
-          docker pull ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG}
-          docker tag ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG} ${IMAGE_TAG}
-      - name: Run Unit Tests
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-          pytest_marks: ${{ inputs.include_nightly_marks && 'unit and (nightly or post_merge or pre_merge)' || 'unit and (post_merge or pre_merge)' }}
-          framework: ${{ matrix.framework }}
-          test_type: unit
-          platform_arch: ${{ matrix.arch.arch }}
-          cpu_limit: '8'
-          dry_run: ${{ matrix.arch.arch == 'arm64' && 'true' || 'false' }}
-  integration-tests:
-    name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-integ
-    needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
-    if: always()
-    runs-on: ${{ matrix.arch.runner }}
-    timeout-minutes: ${{ matrix.arch.timeout }}
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-        arch:
-          - arch: amd64
-            runner: prod-builder-amd-gpu-v1
-            timeout: 90
-          - arch: arm64
-            runner: prod-builder-arm-v1
-            timeout: 90
-    steps:
-      - uses: actions/checkout@v4
-      - name: Check if build succeeded
-        id: check_build
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set +x
-          echo "==========================================="
-          echo "DEBUG: Checking build status"
-          echo "==========================================="
-          echo "Framework: ${{ matrix.framework }}"
-          echo "Architecture: ${{ matrix.arch.arch }}"
-          echo "Repository: ${{ github.repository }}"
-          echo "Run ID: ${{ github.run_id }}"
-          BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
-          echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
-          # Query GitHub API for job status
-          echo ""
-          echo "Querying GitHub API..."
-          JOBS_RESPONSE=$(curl -s -S -L -w "\n%{http_code}" \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
-          HTTP_CODE=$(echo "$JOBS_RESPONSE" | tail -n1)
-          JOBS=$(echo "$JOBS_RESPONSE" | sed '$d')
-          echo "HTTP Response Code: $HTTP_CODE"
-          if [ "$HTTP_CODE" != "200" ]; then
-            echo "Error: GitHub API returned non-200 status code"
-            echo "Response: $JOBS"
-            exit 1
-          fi
-          # Debug: Show total jobs and all job names
-          TOTAL_JOBS=$(echo "$JOBS" | jq '.jobs | length')
-          echo ""
-          echo "Total jobs found: $TOTAL_JOBS"
-          echo ""
-          echo "All job names in this workflow run:"
-          echo "$JOBS" | jq -r '.jobs[] | "  - \(.name) [status: \(.status), conclusion: \(.conclusion)]"'
-          echo ""
-          # Try exact endswith match
-          echo "Searching for jobs ending with: '$BUILD_JOB_PATTERN'"
-          MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | endswith($pattern))] | length')
-          echo "Jobs matching endswith pattern: $MATCHING_JOBS"
-          if [ "$MATCHING_JOBS" -eq 0 ]; then
-            echo ""
-            echo "WARNING: No jobs found with endswith pattern"
-            echo "Trying contains pattern instead..."
-            MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | contains($pattern))] | length')
-            echo "Jobs matching contains pattern: $MATCHING_JOBS"
-            if [ "$MATCHING_JOBS" -gt 0 ]; then
-              BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .conclusion' | head -1)
-              MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .name' | head -1)
-            fi
-          else
-            BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .conclusion' | head -1)
-            MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .name' | head -1)
-          fi
-          echo ""
-          echo "==========================================="
-          echo "RESULT:"
-          echo "  Matched job: ${MATCHED_JOB_NAME:-none}"
-          echo "  Build status: ${BUILD_STATUS:-not found}"
-          echo "==========================================="
-          # Handle various status cases
-          if [ -z "$BUILD_STATUS" ] || [ "$BUILD_STATUS" = "null" ] || [ "$BUILD_STATUS" = "" ]; then
-            echo ""
-            echo "ERROR: Could not determine build status"
-            echo "This could mean:"
-            echo "  1. The build job is still running"
-            echo "  2. The job name pattern doesn't match"
-            echo "  3. The API response doesn't include this job yet"
-            exit 1
-          fi
-          if [ "$BUILD_STATUS" != "success" ]; then
-            echo ""
-            echo "ERROR: Build did not succeed (status: $BUILD_STATUS)"
-            exit 1
-          fi
-          echo ""
-          echo "✅ Build succeeded. Proceeding with tests."
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-      - name: Pull image
-        shell: bash
-        env:
-          ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          IMAGE_TAG: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-        run: |
-          docker pull ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG}
-          docker tag ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG} ${IMAGE_TAG}
-      - name: Run Integration Tests
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-          pytest_marks: ${{ inputs.include_nightly_marks && 'integration and (nightly or post_merge or pre_merge)' || 'integration and (post_merge or pre_merge)' }}
-          framework: ${{ matrix.framework }}
-          test_type: integration
-          platform_arch: ${{ matrix.arch.arch }}
-          dry_run: ${{ matrix.arch.arch == 'arm64' && 'true' || 'false' }}
-  e2e-single-gpu-tests:
-    name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-1gpu-e2e
-    needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
-    if: always()
-    runs-on: ${{ matrix.arch.runner }}
-    timeout-minutes: ${{ matrix.arch.timeout }}
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-        arch:
-          - arch: amd64
-            runner: prod-builder-amd-gpu-v1
-            timeout: 180
-          - arch: arm64
-            runner: prod-builder-arm-v1
-            timeout: 180
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          lfs: true
-      - name: Check if build succeeded
-        id: check_build
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set +x
-          echo "==========================================="
-          echo "DEBUG: Checking build status"
-          echo "==========================================="
-          echo "Framework: ${{ matrix.framework }}"
-          echo "Architecture: ${{ matrix.arch.arch }}"
-          echo "Repository: ${{ github.repository }}"
-          echo "Run ID: ${{ github.run_id }}"
-          BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
-          echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
-          # Query GitHub API for job status
-          echo ""
-          echo "Querying GitHub API..."
-          JOBS_RESPONSE=$(curl -s -S -L -w "\n%{http_code}" \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
-          HTTP_CODE=$(echo "$JOBS_RESPONSE" | tail -n1)
-          JOBS=$(echo "$JOBS_RESPONSE" | sed '$d')
-          echo "HTTP Response Code: $HTTP_CODE"
-          if [ "$HTTP_CODE" != "200" ]; then
-            echo "Error: GitHub API returned non-200 status code"
-            echo "Response: $JOBS"
-            exit 1
-          fi
-          # Debug: Show total jobs and all job names
-          TOTAL_JOBS=$(echo "$JOBS" | jq '.jobs | length')
-          echo ""
-          echo "Total jobs found: $TOTAL_JOBS"
-          echo ""
-          echo "All job names in this workflow run:"
-          echo "$JOBS" | jq -r '.jobs[] | "  - \(.name) [status: \(.status), conclusion: \(.conclusion)]"'
-          echo ""
-          # Try exact endswith match
-          echo "Searching for jobs ending with: '$BUILD_JOB_PATTERN'"
-          MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | endswith($pattern))] | length')
-          echo "Jobs matching endswith pattern: $MATCHING_JOBS"
-          if [ "$MATCHING_JOBS" -eq 0 ]; then
-            echo ""
-            echo "WARNING: No jobs found with endswith pattern"
-            echo "Trying contains pattern instead..."
-            MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | contains($pattern))] | length')
-            echo "Jobs matching contains pattern: $MATCHING_JOBS"
-            if [ "$MATCHING_JOBS" -gt 0 ]; then
-              BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .conclusion' | head -1)
-              MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .name' | head -1)
-            fi
-          else
-            BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .conclusion' | head -1)
-            MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .name' | head -1)
-          fi
-          echo ""
-          echo "==========================================="
-          echo "RESULT:"
-          echo "  Matched job: ${MATCHED_JOB_NAME:-none}"
-          echo "  Build status: ${BUILD_STATUS:-not found}"
-          echo "==========================================="
-          # Handle various status cases
-          if [ -z "$BUILD_STATUS" ] || [ "$BUILD_STATUS" = "null" ] || [ "$BUILD_STATUS" = "" ]; then
-            echo ""
-            echo "ERROR: Could not determine build status"
-            echo "This could mean:"
-            echo "  1. The build job is still running"
-            echo "  2. The job name pattern doesn't match"
-            echo "  3. The API response doesn't include this job yet"
-            exit 1
-          fi
-          if [ "$BUILD_STATUS" != "success" ]; then
-            echo ""
-            echo "ERROR: Build did not succeed (status: $BUILD_STATUS)"
-            exit 1
-          fi
-          echo ""
-          echo "✅ Build succeeded. Proceeding with tests."
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-      - name: Pull image
-        shell: bash
-        env:
-          ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          IMAGE_TAG: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-        run: |
-          docker pull ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG}
-          docker tag ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG} ${IMAGE_TAG}
-      - name: Run E2E Tests (gpu_1)
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-          pytest_marks: "${{ matrix.framework }} and e2e and gpu_1"
-          framework: ${{ matrix.framework }}
-          test_type: e2e-single-gpu
-          platform_arch: ${{ matrix.arch.arch }}
-          dry_run: ${{ matrix.arch.arch == 'arm64' && 'true' || 'false' }}
-  e2e-multi-gpu-tests:
-    name: ${{ matrix.framework }}-${{ matrix.arch.arch }}-2gpu-e2e
-    needs: [build-amd64, build-arm64, build-cuda13-arm64, build-cuda13-amd64]
-    if: always()
-    runs-on: ${{ matrix.arch.runner }}
-    timeout-minutes: ${{ matrix.arch.timeout }}
-    strategy:
-      fail-fast: false
-      matrix:
-        framework: [vllm, trtllm, sglang]
-        arch:
-          - arch: amd64
-            runner: prod-builder-amd-gpu-v1
-            timeout: 150
-          - arch: arm64
-            runner: prod-builder-arm-v1
-            timeout: 150
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          lfs: true
-      - name: Check if build succeeded
-        id: check_build
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set +x
-          echo "==========================================="
-          echo "DEBUG: Checking build status"
-          echo "==========================================="
-          echo "Framework: ${{ matrix.framework }}"
-          echo "Architecture: ${{ matrix.arch.arch }}"
-          echo "Repository: ${{ github.repository }}"
-          echo "Run ID: ${{ github.run_id }}"
-          BUILD_JOB_PATTERN="Build ${{ matrix.framework }} ${{ matrix.framework == 'trtllm' && 'CUDA13 ' || '' }}(${{ matrix.arch.arch }})"
-          echo "Looking for job pattern: '$BUILD_JOB_PATTERN'"
-          # Query GitHub API for job status
-          echo ""
-          echo "Querying GitHub API..."
-          JOBS_RESPONSE=$(curl -s -S -L -w "\n%{http_code}" \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
-          HTTP_CODE=$(echo "$JOBS_RESPONSE" | tail -n1)
-          JOBS=$(echo "$JOBS_RESPONSE" | sed '$d')
-          echo "HTTP Response Code: $HTTP_CODE"
-          if [ "$HTTP_CODE" != "200" ]; then
-            echo "Error: GitHub API returned non-200 status code"
-            echo "Response: $JOBS"
-            exit 1
-          fi
-          # Debug: Show total jobs and all job names
-          TOTAL_JOBS=$(echo "$JOBS" | jq '.jobs | length')
-          echo ""
-          echo "Total jobs found: $TOTAL_JOBS"
-          echo ""
-          echo "All job names in this workflow run:"
-          echo "$JOBS" | jq -r '.jobs[] | "  - \(.name) [status: \(.status), conclusion: \(.conclusion)]"'
-          echo ""
-          # Try exact endswith match
-          echo "Searching for jobs ending with: '$BUILD_JOB_PATTERN'"
-          MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | endswith($pattern))] | length')
-          echo "Jobs matching endswith pattern: $MATCHING_JOBS"
-          if [ "$MATCHING_JOBS" -eq 0 ]; then
-            echo ""
-            echo "WARNING: No jobs found with endswith pattern"
-            echo "Trying contains pattern instead..."
-            MATCHING_JOBS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '[.jobs[] | select(.name | contains($pattern))] | length')
-            echo "Jobs matching contains pattern: $MATCHING_JOBS"
-            if [ "$MATCHING_JOBS" -gt 0 ]; then
-              BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .conclusion' | head -1)
-              MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .name' | head -1)
-            fi
-          else
-            BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .conclusion' | head -1)
-            MATCHED_JOB_NAME=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | endswith($pattern)) | .name' | head -1)
-          fi
-          echo ""
-          echo "==========================================="
-          echo "RESULT:"
-          echo "  Matched job: ${MATCHED_JOB_NAME:-none}"
-          echo "  Build status: ${BUILD_STATUS:-not found}"
-          echo "==========================================="
-          # Handle various status cases
-          if [ -z "$BUILD_STATUS" ] || [ "$BUILD_STATUS" = "null" ] || [ "$BUILD_STATUS" = "" ]; then
-            echo ""
-            echo "ERROR: Could not determine build status"
-            echo "This could mean:"
-            echo "  1. The build job is still running"
-            echo "  2. The job name pattern doesn't match"
-            echo "  3. The API response doesn't include this job yet"
-            exit 1
-          fi
-          if [ "$BUILD_STATUS" != "success" ]; then
-            echo ""
-            echo "ERROR: Build did not succeed (status: $BUILD_STATUS)"
-            exit 1
-          fi
-          echo ""
-          echo "✅ Build succeeded. Proceeding with tests."
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-      - name: Pull image
-        shell: bash
-        env:
-          ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          IMAGE_TAG: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-        run: |
-          docker pull ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG}
-          docker tag ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${IMAGE_TAG} ${IMAGE_TAG}
-      - name: Run E2E Tests (gpu_2)
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ env.IMAGE_PREFIX }}-${{ matrix.framework }}-${{ matrix.arch.arch }}
-          pytest_marks: ${{ inputs.include_nightly_marks && '(nightly or post_merge or pre_merge) and e2e and gpu_2' || '(post_merge or pre_merge) and e2e and gpu_2' }}
-          framework: ${{ matrix.framework }}
-          test_type: e2e-multi-gpu
-          platform_arch: ${{ matrix.arch.arch }}
-          dry_run: 'true'
-  ############################## FAULT TOLERANCE TESTS ##############################
-  fault-tolerance-tests:
-    name: ${{ matrix.framework.name }}-ft-k8s
-    needs: [build-amd64]
-    if: always() && inputs.pipeline_type == 'nightly'
-    runs-on: prod-builder-amd-v1
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        framework:
-          - name: vllm
-            test_scenario: "vllm-agg"
-          - name: trtllm
-            test_scenario: "trtllm-agg"
-          - name: sglang
-            test_scenario: "sglang-agg"
-    env:
-      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-      NIGHTLY_IMAGE_PREFIX: ${{ inputs.image_prefix }}
-      NAMESPACE: ft-${{ matrix.framework.name }}-${{ github.run_id }}-${{ github.run_attempt }}
-      DYNAMO_INGRESS_SUFFIX: ${{ secrets.DYNAMO_INGRESS_SUFFIX }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Check if build succeeded
-        id: check_build
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set +x
-          BUILD_JOB_PATTERN="Build ${{ matrix.framework.name }} (amd64)"
-          JOBS_RESPONSE=$(curl -s -S -L -w "\n%{http_code}" \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100")
-          HTTP_CODE=$(echo "$JOBS_RESPONSE" | tail -n1)
-          JOBS=$(echo "$JOBS_RESPONSE" | sed '$d')
-          if [ "$HTTP_CODE" != "200" ]; then
-            echo "Error: GitHub API returned non-200 status code"
-            exit 1
-          fi
-          BUILD_STATUS=$(echo "$JOBS" | jq -r --arg pattern "$BUILD_JOB_PATTERN" '.jobs[] | select(.name | contains($pattern)) | .conclusion' | head -1)
-          if [ -z "$BUILD_STATUS" ] || [ "$BUILD_STATUS" = "null" ] || [ "$BUILD_STATUS" != "success" ]; then
-            echo "ERROR: Build did not succeed (status: $BUILD_STATUS)"
-            exit 1
-          fi
-          echo "✅ Build succeeded. Proceeding with fault tolerance tests."
-      - name: Login to Container Registries
-        uses: ./.github/actions/docker-login
-        with:
-          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
-          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
-          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
-          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
-          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
-      - name: Setup Kubernetes
-        run: |
-          echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
-          chmod 600 .kubeconfig
-          export KUBECONFIG=$(pwd)/.kubeconfig
-          kubectl cluster-info
-      - name: Deploy Operator
-        run: |
-          set -x
-          export KUBECONFIG=$(pwd)/.kubeconfig
-          # Create a namespace for this job
-          echo "Creating an ephemeral namespace..."
-          kubectl delete namespace $NAMESPACE || true
-          kubectl create namespace $NAMESPACE || true
-          echo "Attaching the labels for secrets and cleanup"
-          kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true
-          # Set the namespace as default
-          kubectl config set-context --current --namespace=$NAMESPACE
-          # Check if Istio is installed
-          kubectl get pods -n istio-system
-          # Check if default storage class exists
-          kubectl get storageclass
-          # Install Helm chart
-          export VIRTUAL_ENV=/opt/dynamo/venv
-          export KUBE_NS=$NAMESPACE
-          export ISTIO_ENABLED=true
-          export ISTIO_GATEWAY=istio-system/ingress-alb
-          export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true
-          # Install dynamo env secrets
-          kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=${{ secrets.HF_TOKEN }} -n $KUBE_NS || true
-          # Create docker pull secret for operator image
-          kubectl create secret docker-registry docker-imagepullsecret --docker-server=${{ secrets.AZURE_ACR_HOSTNAME }} --docker-username=${{ secrets.AZURE_ACR_USER }} --docker-password=${{ secrets.AZURE_ACR_PASSWORD }} --namespace=${NAMESPACE}
-          # Pull operator image (using nightly tag for operator too)
-          export ECR_HOSTNAME=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          docker pull ${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${{ env.NIGHTLY_IMAGE_PREFIX }}-operator-amd64 || echo "Operator image not found, will use SHA-based tag"
-          # Install helm dependencies
-          helm repo add bitnami https://charts.bitnami.com/bitnami
-          cd deploy/helm/charts/platform/
-          helm dep build .
-          # Install platform with namespace restriction
-          helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \
-            --set dynamo-operator.namespaceRestriction.enabled=true \
-            --set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \
-            --set dynamo-operator.controllerManager.manager.image.repository=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo \
-            --set dynamo-operator.controllerManager.manager.image.tag=${{ github.sha }}-operator-amd64 \
-            --set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret \
-            --timeout 10m --wait
-          # Wait for all deployments to be ready
-          timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch
-          cd -
-          export KUBECONFIG=$(pwd)/.kubeconfig
-          kubectl config set-context --current --namespace=$NAMESPACE
-      - name: Run Fault Tolerance Tests
-        id: run-ft-tests
-        run: |
-          set -x
-          export KUBECONFIG=$(pwd)/.kubeconfig
-          export NAMESPACE=$NAMESPACE
-          export FRAMEWORK=${{ matrix.framework.name }}
-          export ECR_HOSTNAME=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
-          export IMAGE="${ECR_HOSTNAME}/${{ env.REGISTRY_IMAGE }}:${{ env.NIGHTLY_IMAGE_PREFIX }}-${FRAMEWORK}-amd64"
-          echo "Running fault tolerance test: ${{ matrix.framework.test_scenario }}"
-          echo "Using namespace: $NAMESPACE"
-          echo "Using image tag: ${{ env.NIGHTLY_IMAGE_PREFIX }}-${FRAMEWORK}-amd64"
-          # Install python3-venv package if not already installed
-          sudo apt-get update && sudo apt-get install -y python3-venv
-          # Set up Python virtual environment and install test dependencies
-          python3 -m venv venv
-          source venv/bin/activate
-          pip install --upgrade pip
-          pip install -r container/deps/requirements.test.txt
-          pip install kubernetes==32.0.1 kubernetes_asyncio kr8s pyyaml requests tabulate pydantic
-          # Create test-results directory
-          mkdir -p test-results
-          # Run the pytest command with JUnit XML output
-          set +e  # Don't exit on test failures
-          pytest tests/fault_tolerance/deploy/test_deployment.py \
-            -m 'k8s and fault_tolerance' \
-            -k '${{ matrix.framework.test_scenario }}' \
-            -s -v \
-            --namespace ${NAMESPACE} \
-            --image ${IMAGE} \
-            --client-type legacy \
-            --junitxml=test-results/pytest_ft_report.xml \
-            --tb=short
-          TEST_EXIT_CODE=$?
-          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
-          echo "🧪 Fault tolerance tests completed with exit code: ${TEST_EXIT_CODE}"
-          exit ${TEST_EXIT_CODE}
-        continue-on-error: true
-      - name: Process Fault Tolerance Test Results
-        if: always()
-        run: |
-          set -x
-          # Rename JUnit XML with unique naming if it exists
-          if [ -f "test-results/pytest_ft_report.xml" ]; then
-            mv "test-results/pytest_ft_report.xml" "test-results/pytest_ft_report_${{ matrix.framework.name }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.xml"
-            echo "✅ JUnit XML report renamed with unique identifier"
-          else
-            echo "⚠️  JUnit XML report not found"
-          fi
-      - name: Upload Fault Tolerance Test Results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: test-results-${{ matrix.framework.name }}-fault_tolerance-amd64-${{ github.run_id }}-${{ job.check_run_id }}
-          path: test-results/pytest_ft_report_${{ matrix.framework.name }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.xml
-          retention-days: 7
-      - name: Cleanup
-        if: always()
-        timeout-minutes: 5
-        run: |
-          echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
-          chmod 600 .kubeconfig
-          export KUBECONFIG=$(pwd)/.kubeconfig
-          kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
-          # For debugging purposes, list all the resources before we uninstall
-          kubectl get all
-          echo "Deleting all DynamoGraphDeployments in namespace $NAMESPACE..."
-          kubectl delete dynamographdeployments --all -n $NAMESPACE || true
-          # Uninstall the helm chart
-          helm ls
-          helm uninstall dynamo-platform || true
-          echo "Namespace $NAMESPACE deletion initiated, proceeding with cleanup..."
-          kubectl delete namespace $NAMESPACE || true
-          echo "Namespace $NAMESPACE completed."
-  ############################## RESULTS SUMMARY ##############################
-  results-summary:
-    name: Results Summary
-    runs-on: ubuntu-latest
-    if: always()
-    needs: [build-amd64, build-arm64, build-cuda13-amd64, build-cuda13-arm64, unit-tests, integration-tests, e2e-single-gpu-tests, e2e-multi-gpu-tests, fault-tolerance-tests]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-      - name: Gather job metadata
-        id: gather
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          PIPELINE_TYPE: ${{ inputs.pipeline_type }}
-        run: |
-          set +x -e
-          echo "# ${PIPELINE_TYPE^} CI Results Summary" > results.md
-          echo "" >> results.md
-          echo "| Stage | Status | Runner | Duration (min) | Artifacts |" >> results.md
-          echo "|-------|--------|--------|----------------|-----------|" >> results.md
-          curl -s -S -L --fail-with-body \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github.v3+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100" \
-            2>/dev/null | jq -c '.jobs[] | {id, name, runner_name, conclusion, started_at, completed_at}' > jobs.jsonl
-          while read job_entry; do
-            job_id=$(echo "$job_entry" | jq -r '.id')
-            name=$(echo "$job_entry" | jq -r '.name')
-            runner=$(echo "$job_entry" | jq -r '.runner_name')
-            status=$(echo "$job_entry" | jq -r '.conclusion')
-            started=$(echo "$job_entry" | jq -r '.started_at')
-            completed=$(echo "$job_entry" | jq -r '.completed_at')
-            minutes="N/A"
-            if [[ "$started" != "null" && "$completed" != "null" ]]; then
-              start_epoch=$(date -d "$started" +%s)
-              end_epoch=$(date -d "$completed" +%s)
-              minutes=$(( (end_epoch - start_epoch)/60 ))
-            fi
-            artifact_link="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#job-$job_id"
-            printf "| %s | %s | %s | %s | [Log & Artifacts](%s) |\n" "$name" "$status" "$runner" "$minutes" "$artifact_link" >> results.md
-          done < jobs.jsonl
-          echo "" >> results.md
-          echo "---" >> results.md
-      - name: Display workflow summary
-        run: cat results.md
-      - name: Upload results summary as job summary
-        run: cat results.md >> $GITHUB_STEP_SUMMARY
-      - name: Upload results as artifact
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: ${{ inputs.pipeline_type }}-results-summary
-          path: results.md
-          retention-days: 7
-  ############################## SLACK NOTIFICATION ##############################
-  notify-slack:
-    name: Notify Slack
-    runs-on: prod-builder-amd-v1
-    if: always() && inputs.enable_slack_notification && !github.event.repository.fork
-    needs: results-summary
-    permissions:
-      contents: read
-    env:
-      HAS_SLACK_WEBHOOK: ${{ secrets.SLACK_NOTIFY_NIGHTLY_WEBHOOK_URL != '' }}
-    steps:
-      - name: Send Slack notification
-        if: env.HAS_SLACK_WEBHOOK == 'true'
-        continue-on-error: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_NOTIFY_NIGHTLY_WEBHOOK_URL }}
-          SLACK_OPS_GROUP_ID: ${{ secrets.SLACK_OPS_SUPPORT_GROUP_ID }}
-          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-          PIPELINE_TYPE: ${{ inputs.pipeline_type }}
-        run: |
-          set -euo pipefail
-          JOBS_JSON=$(mktemp)
-          trap 'rm -f "$JOBS_JSON"' EXIT
-          if ! curl -sSL \
-            -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-            -H "Accept: application/vnd.github+json" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs?per_page=100" \
-            > "$JOBS_JSON"; then
-            echo "Error: Failed to fetch job data from GitHub API"
-            exit 1
-          fi
-          if [ ! -s "$JOBS_JSON" ]; then
-            echo "Error: No job data received"
-            exit 1
-          fi
-          TOTAL_JOBS=$(jq '[.jobs[]] | length' "$JOBS_JSON")
-          SUCCESS_COUNT=$(jq '[.jobs[] | select(.conclusion == "success")] | length' "$JOBS_JSON")
-          FAILED_COUNT=$(jq '[.jobs[] | select(.conclusion == "failure")] | length' "$JOBS_JSON")
-          if [ "$FAILED_COUNT" -eq 0 ]; then
-            STATUS="Success ✅"
-            STATUS_EMOJI=":white_check_mark:"
-          else
-            STATUS="Failed ❌"
-            STATUS_EMOJI=":x:"
-          fi
-          # Capitalize pipeline type for display
-          DISPLAY_TYPE="${PIPELINE_TYPE^}"
-          # Main message with summary
-          SUMMARY_TEXT="*${DISPLAY_TYPE} CI Pipeline - ${STATUS}*"$'\n'"Summary: ${SUCCESS_COUNT}/${TOTAL_JOBS} jobs passed"$'\n'"<${RUN_URL}|View Workflow Summary>"
-          if [ "$FAILED_COUNT" -eq 0 ]; then
-            # Success - simple message
-            PAYLOAD=$(jq -n \
-              --arg text "$SUMMARY_TEXT" \
-              '{text: $text}')
-          else
-            # Failed - message with blocks
-            FAILED_JOBS=$(jq -r '.jobs[] | select(.conclusion == "failure") | "• " + .name' "$JOBS_JSON")
-            FAILED_JOBS_TEXT="*Failed Jobs (${FAILED_COUNT}):*"$'\n'"${FAILED_JOBS}"
-            # Build ops-support mention (use group ID if available, otherwise plain text)
-            if [ -n "${SLACK_OPS_GROUP_ID:-}" ]; then
-              OPS_MENTION="<!subteam^${SLACK_OPS_GROUP_ID}|@ops-support>"
-            else
-              OPS_MENTION="@ops-support"
-            fi
-            ACTION_TEXT=":rotating_light: cc ${OPS_MENTION} - Please investigate the failures above."
-            PAYLOAD=$(jq -n \
-              --arg summary "$SUMMARY_TEXT" \
-              --arg failed "$FAILED_JOBS_TEXT" \
-              --arg action "$ACTION_TEXT" \
-              '{
-                text: $summary,
-                blocks: [
-                  {
-                    type: "section",
-                    text: {
-                      type: "mrkdwn",
-                      text: $summary
-                    }
-                  },
-                  {
-                    type: "section",
-                    text: {
-                      type: "mrkdwn",
-                      text: $failed
-                    }
-                  },
-                  {
-                    type: "divider"
-                  },
-                  {
-                    type: "context",
-                    elements: [
-                      {
-                        type: "mrkdwn",
-                        text: $action
-                      }
-                    ]
-                  }
-                ]
-              }')
-          fi
-          if curl -sSf -X POST -H "Content-Type: application/json" -d "$PAYLOAD" "$SLACK_WEBHOOK_URL"; then
-            echo "Slack notification sent successfully"
-          else
-            echo "Warning: Failed to send Slack notification"
-            exit 1
-          fi
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,8 +20,15 @@ on:
 permissions:
  contents: write
+env:
+  REGISTRY_IMAGE: ai-dynamo/dynamo
+  BUILDER_NAME: b-${{ github.run_id }}-${{ github.run_attempt }}
 jobs:
-  # Gate job for manual triggers - requires automated-release approval
+  # ============================================================================
+  # GATE: Approval + Version Extraction
+  # ============================================================================
  manual-approval:
    name: Approve Manual Run
    if: github.event_name == 'workflow_dispatch'
@@ -29,9 +36,8 @@ jobs:
    environment: automated-release
    steps:
      - name: Manual run approved
-        run: echo "✅ Manual workflow run approved via automated-release environment"
+        run: echo "Manual workflow run approved via automated-release environment"
-  # Extract version from branch name for downstream jobs
  prepare-release:
    name: Prepare Release
    runs-on: ubuntu-latest
@@ -42,11 +48,9 @@ jobs:
      - name: Extract version from branch
        id: extract
        run: |
-          # Extract version from branch name (e.g., release/0.7.0 -> 0.7.0)
          BRANCH_NAME="${GITHUB_REF#refs/heads/}"
          VERSION="${BRANCH_NAME#release/}"
-          # Enforce workflow_dispatch only runs on release/* branches
          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            if [[ ! "$BRANCH_NAME" =~ ^release/[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
              echo "Error: workflow_dispatch can only be triggered from release/* branches"
@@ -66,37 +70,153 @@ jobs:
          echo "image_prefix=release-${VERSION}" >> $GITHUB_OUTPUT
          echo "Detected version: ${VERSION}"
-  # Run the CI test suite (builds + tests)
+  # ============================================================================
-  ci-pipeline:
+  # FRAMEWORK PIPELINES (Build + Test + Distribute)
-    name: Release CI
+  # Builds amd64+arm64 images, runs tests, copies amd64 to ACR.
+  # release-publish then copies both architectures from ECR to NGC.
+  #
+  # NOTE: Each job directly depends on [prepare-release, manual-approval] with
+  # always() instead of going through an intermediate gate job. This avoids a
+  # GitHub Actions quirk where a skipped ancestor (manual-approval on push
+  # events) taints the entire dependency chain, causing downstream jobs to skip
+  # even when the intermediate gate succeeds.
+  # ============================================================================
+  vllm-pipeline:
+    name: vllm builds
    needs: [prepare-release, manual-approval]
-    # Run if: prepare-release succeeded AND (push event OR manual-approval succeeded)
    if: |
      always() &&
      needs.prepare-release.result == 'success' &&
      (github.event_name == 'push' || needs.manual-approval.result == 'success')
-    uses: ./.github/workflows/ci-test-suite.yml
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
    with:
-      pipeline_type: release
+      framework: vllm
-      include_nightly_marks: false
+      target: runtime
-      image_prefix: ${{ needs.prepare-release.outputs.image_prefix }}
+      platforms: '["amd64", "arm64"]'
-      enable_slack_notification: false
+      cuda_versions: '["12.9", "13.0"]'
-    secrets:
+      extra_tags: |
-      AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
+        ${{ needs.prepare-release.outputs.image_prefix }}-vllm
-      AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      build_timeout_minutes: 120
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      cpu_only_test_markers: '(pre_merge or post_merge) and vllm and gpu_0'
-      NGC_CI_ACCESS_TOKEN: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
+      cpu_only_test_timeout_minutes: 60
-      CI_TOKEN: ${{ secrets.CI_TOKEN }}
+      single_gpu_test_markers: '(pre_merge or post_merge) and vllm and gpu_1'
-      SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
+      single_gpu_test_timeout_minutes: 60
-      AZURE_ACR_HOSTNAME: ${{ secrets.AZURE_ACR_HOSTNAME }}
+      multi_gpu_test_markers: '(pre_merge or post_merge) and vllm and (gpu_2 or gpu_4)'
-      AZURE_ACR_USER: ${{ secrets.AZURE_ACR_USER }}
+      multi_gpu_test_timeout_minutes: 60
-      AZURE_ACR_PASSWORD: ${{ secrets.AZURE_ACR_PASSWORD }}
+    secrets: inherit
-      AZURE_AKS_CI_KUBECONFIG_B64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
-      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+  sglang-pipeline:
-      DYNAMO_INGRESS_SUFFIX: ${{ secrets.DYNAMO_INGRESS_SUFFIX }}
+    name: sglang builds
+    needs: [prepare-release, manual-approval]
-  # Build frontend images (needed for NGC publish)
+    if: |
+      always() &&
+      needs.prepare-release.result == 'success' &&
+      (github.event_name == 'push' || needs.manual-approval.result == 'success')
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: sglang
+      target: runtime
+      platforms: '["amd64", "arm64"]'
+      cuda_versions: '["12.9", "13.0"]'
+      extra_tags: |
+        ${{ needs.prepare-release.outputs.image_prefix }}-sglang
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: 120
+      cpu_only_test_markers: '(pre_merge or post_merge) and sglang and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      single_gpu_test_markers: '(pre_merge or post_merge) and sglang and gpu_1'
+      single_gpu_test_timeout_minutes: 60
+      multi_gpu_test_markers: '(pre_merge or post_merge) and sglang and (gpu_2 or gpu_4)'
+      multi_gpu_test_timeout_minutes: 60
+    secrets: inherit
+  trtllm-pipeline:
+    name: trtllm builds
+    needs: [prepare-release, manual-approval]
+    if: |
+      always() &&
+      needs.prepare-release.result == 'success' &&
+      (github.event_name == 'push' || needs.manual-approval.result == 'success')
+    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    with:
+      framework: trtllm
+      target: runtime
+      platforms: '["amd64", "arm64"]'
+      cuda_versions: '["13.1"]'
+      extra_tags: |
+        ${{ needs.prepare-release.outputs.image_prefix }}-trtllm
+      builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+      build_timeout_minutes: 120
+      cpu_only_test_markers: '(pre_merge or post_merge) and trtllm and gpu_0'
+      cpu_only_test_timeout_minutes: 60
+      single_gpu_test_markers: '(pre_merge or post_merge) and trtllm and gpu_1'
+      single_gpu_test_timeout_minutes: 60
+      multi_gpu_test_markers: '(pre_merge or post_merge) and trtllm and (gpu_2 or gpu_4)'
+      multi_gpu_test_timeout_minutes: 60
+    secrets: inherit
+  # ============================================================================
+  # RELEASE-SPECIFIC BUILDS
+  # ============================================================================
+  operator-build:
+    name: Build Operator Image
+    needs: [prepare-release, manual-approval]
+    if: |
+      always() &&
+      needs.prepare-release.result == 'success' &&
+      (github.event_name == 'push' || needs.manual-approval.result == 'success')
+    runs-on: prod-default-v2
+    env:
+      IMAGE_REGISTRY: ai-dynamo
+      IMAGE_REPOSITORY: dynamo
+      ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
+    outputs:
+      operator_tag: ${{ steps.build-and-push.outputs.operator_tag }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Initialize Dynamo Builder
+        uses: ./.github/actions/init-dynamo-builder
+        with:
+          builder_name: ${{ env.BUILDER_NAME }}
+          flavor: general
+          all_arch: 'true'
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Linter
+        working-directory: ./deploy/operator
+        run: docker buildx build --platform linux/arm64 --target linter --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ .
+      - name: Tester
+        working-directory: ./deploy/operator
+        run: docker buildx build --platform linux/arm64 --target tester --progress=plain --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ .
+      - name: Build and push Container
+        id: build-and-push
+        working-directory: ./deploy/operator
+        run: |
+          ECR_BASE="${ECR_HOSTNAME}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
+          ACR_BASE="${{ secrets.AZURE_ACR_HOSTNAME }}/${IMAGE_REGISTRY}/${IMAGE_REPOSITORY}"
+          SHA_TAG="${{ github.sha }}-operator"
+          PREFIX_TAG="${{ needs.prepare-release.outputs.image_prefix }}-operator"
+          IMAGE_URIS=(
+            "${ECR_BASE}:${SHA_TAG}"
+            "${ECR_BASE}:${PREFIX_TAG}"
+            "${ACR_BASE}:${SHA_TAG}"
+            "${ACR_BASE}:${PREFIX_TAG}"
+          )
+          echo "operator_tag=${PREFIX_TAG}" >> $GITHUB_OUTPUT
+          TAGGING_FLAGS=$(printf -- '-t %s ' "${IMAGE_URIS[@]}")
+          docker buildx build --push --platform linux/amd64,linux/arm64 \
+              --build-arg DOCKER_PROXY=${ECR_HOSTNAME}/dockerhub/ \
+              ${TAGGING_FLAGS} -f Dockerfile .
  frontend-build:
    name: Build Frontend Images
    needs: [prepare-release, manual-approval]
@@ -105,30 +225,193 @@ jobs:
      needs.prepare-release.result == 'success' &&
      (github.event_name == 'push' || needs.manual-approval.result == 'success')
    uses: ./.github/workflows/build-frontend-image.yaml
-    secrets:
+    with:
-      AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
+      skip_change_detection: true
-      AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
+      image_prefix: ${{ needs.prepare-release.outputs.image_prefix }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+    secrets: inherit
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      AZURE_ACR_HOSTNAME: ${{ secrets.AZURE_ACR_HOSTNAME }}
+  # ============================================================================
-      AZURE_ACR_USER: ${{ secrets.AZURE_ACR_USER }}
+  # BUILDER CLEANUP
-      AZURE_ACR_PASSWORD: ${{ secrets.AZURE_ACR_PASSWORD }}
+  # ============================================================================
-      CI_TOKEN: ${{ secrets.CI_TOKEN }}
-      SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
+  clean-k8s-builder:
+    name: Clean K8s builder if exists
-  # Tag the commit as release candidate and publish to NGC
+    runs-on: prod-default-small-v2
-  # This job uses the automated-release environment for sensitive secrets
+    if: always()
-  # Runs after ci-pipeline and frontend-build complete - requires builds to succeed
+    needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline]
-  # Note: Tests may fail but builds must succeed for publishing
+    steps:
+    - uses: actions/checkout@v4
+    - name: Create K8s builders (skip bootstrap)
+      uses: ./.github/actions/bootstrap-buildkit
+      continue-on-error: true
+      with:
+        builder_name: b-${{ github.run_id }}-${{ github.run_attempt }}
+        buildkit_worker_addresses: ''
+        skip_bootstrap: true
+    - name: Builder Cleanup
+      run: docker buildx rm b-${{ github.run_id }}-${{ github.run_attempt }} || true
+  # ============================================================================
+  # DEPLOYMENT TESTS
+  # ============================================================================
+  deploy-operator:
+    name: Deploy Operator
+    runs-on: prod-default-small-v2
+    needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator-build]
+    if: |
+      always() &&
+      needs.operator-build.result == 'success'
+    outputs:
+      NAMESPACE: ${{ steps.deploy.outputs.namespace }}
+    steps:
+    - uses: actions/checkout@v4
+    - name: Deploy Operator
+      id: deploy
+      run: |
+        set -x
+        BRANCH="${{ github.ref_name }}"
+        BRANCH_SANITIZED="${BRANCH//\//-}"
+        BRANCH_SANITIZED="${BRANCH_SANITIZED//./-}"
+        BRANCH_SANITIZED="${BRANCH_SANITIZED:0:10}"
+        NAMESPACE="gh-ci-${{ github.run_id }}-${BRANCH_SANITIZED}-dt"
+        echo "namespace=${NAMESPACE}" >> "$GITHUB_OUTPUT"
+        echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
+        chmod 600 .kubeconfig
+        export KUBECONFIG=$(pwd)/.kubeconfig
+        kubectl config set-context --current --namespace=$NAMESPACE
+        kubectl create namespace $NAMESPACE
+        kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true
+        kubectl config set-context --current --namespace=$NAMESPACE
+        kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=${{ secrets.HF_TOKEN }} -n $NAMESPACE || true
+        kubectl create secret docker-registry docker-imagepullsecret \
+          --docker-server=${{ secrets.AZURE_ACR_HOSTNAME }} \
+          --docker-username=${{ secrets.AZURE_ACR_USER }} \
+          --docker-password=${{ secrets.AZURE_ACR_PASSWORD }} \
+          --namespace=${NAMESPACE}
+        helm repo add bitnami https://charts.bitnami.com/bitnami
+        cd deploy/helm/charts/platform/
+        helm dep build .
+        helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \
+          --set dynamo-operator.namespaceRestriction.enabled=true \
+          --set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \
+          --set dynamo-operator.controllerManager.manager.image.repository=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo \
+          --set dynamo-operator.controllerManager.manager.image.tag=${{ needs.prepare-release.outputs.image_prefix }}-operator \
+          --set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret \
+          --set dynamo-operator.gpuDiscovery.enabled=false \
+          --set dynamo-operator.upgradeCRD=false \
+          --debug
+        timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch
+  deploy-test-vllm:
+    if: always() && needs.deploy-operator.result == 'success'
+    runs-on: prod-default-small-v2
+    needs: [deploy-operator, vllm-pipeline]
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        profile: [agg, agg_router, disagg, disagg_router]
+    name: deploy-test-vllm (${{ matrix.profile }})
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Dynamo Deploy Test
+        uses: ./.github/actions/dynamo-deploy-test
+        with:
+          kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
+          namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
+          framework: vllm
+          profile: ${{ matrix.profile }}
+          image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-cuda12-amd64
+          platform_arch: amd64
+  deploy-test-sglang:
+    if: always() && needs.deploy-operator.result == 'success'
+    runs-on: prod-default-small-v2
+    needs: [deploy-operator, sglang-pipeline]
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        profile: [agg, agg_router]
+    name: deploy-test-sglang (${{ matrix.profile }})
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Dynamo Deploy Test
+        uses: ./.github/actions/dynamo-deploy-test
+        with:
+          kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
+          namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
+          framework: sglang
+          profile: ${{ matrix.profile }}
+          image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-cuda12-amd64
+          platform_arch: amd64
+  deploy-test-trtllm:
+    if: always() && needs.deploy-operator.result == 'success'
+    runs-on: prod-default-small-v2
+    needs: [deploy-operator, trtllm-pipeline]
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        profile: [agg, agg_router, disagg, disagg_router]
+    name: deploy-test-trtllm (${{ matrix.profile }})
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Dynamo Deploy Test
+        uses: ./.github/actions/dynamo-deploy-test
+        with:
+          kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
+          namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
+          framework: trtllm
+          profile: ${{ matrix.profile }}
+          image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-cuda13-amd64
+          platform_arch: amd64
+  deploy-cleanup:
+    name: Cleanup AKS resources
+    runs-on: prod-default-small-v2
+    if: always()
+    needs: [deploy-operator, deploy-test-sglang, deploy-test-trtllm, deploy-test-vllm]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Cleanup
+      timeout-minutes: 5
+      env:
+        NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
+      run: |
+        if [ -z "$NAMESPACE" ]; then
+          echo "No namespace to clean up"
+          exit 0
+        fi
+        echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
+        chmod 600 .kubeconfig
+        export KUBECONFIG=$(pwd)/.kubeconfig
+        kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
+        kubectl get dynamographdeployments || true
+        kubectl get all || true
+        kubectl delete dynamographdeployments --all -n $NAMESPACE || true
+        helm uninstall dynamo-platform --namespace $NAMESPACE --timeout 10m || true
+        kubectl delete namespace $NAMESPACE || true
+  # ============================================================================
+  # NGC PUBLISH: RC tag, crane copy to NGC, Helm chart push
+  # Runs after framework builds + operator + frontend complete.
+  # Tests may fail but builds must have produced images for publishing.
+  # ============================================================================
  release-publish:
    name: Tag RC & Publish to NGC
-    needs: [prepare-release, ci-pipeline, frontend-build]
+    needs: [prepare-release, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator-build, frontend-build]
    if: |
      always() && !cancelled() &&
      needs.prepare-release.result == 'success' &&
-      (needs.ci-pipeline.result == 'success' || needs.ci-pipeline.result == 'failure') &&
+      (needs.vllm-pipeline.result == 'success' || needs.sglang-pipeline.result == 'success' || needs.trtllm-pipeline.result == 'success')
-      (needs.frontend-build.result == 'success' || needs.frontend-build.result == 'failure')
+    runs-on: cpu-amd-m5-4xlarge
-    runs-on: cpu-amd-m5-4xlarge  # Self-hosted runner with IAM instance role for ECR access
    environment: automated-release
    env:
      VERSION: ${{ needs.prepare-release.outputs.version }}
@@ -149,9 +432,7 @@ jobs:
        run: |
          set -euo pipefail
-          # Check if RC number was provided as input
          if [ -n "${INPUT_RC_NUMBER}" ]; then
-            # Validate input is a non-negative integer
            if ! [[ "${INPUT_RC_NUMBER}" =~ ^[0-9]+$ ]]; then
              echo "Error: rc_number must be a non-negative integer (got: ${INPUT_RC_NUMBER})"
              exit 1
@@ -159,21 +440,14 @@ jobs:
            NEXT_RC="${INPUT_RC_NUMBER}"
            echo "Using provided RC number: ${NEXT_RC}"
          else
-            # Auto-increment: Find existing RC tags for this version
            echo "No RC number provided. Auto-incrementing..."
-            echo "Looking for existing RC tags for version ${VERSION}..."
-            # Pattern: vX.Y.Z-rcN
            RC_PATTERN="v${VERSION}-rc"
-            # Get all matching tags sorted by RC number
            EXISTING_RCS=$(git tag -l "${RC_PATTERN}*" | grep -E "^v${VERSION}-rc[0-9]+$" | sort -V || true)
            if [ -z "$EXISTING_RCS" ]; then
              NEXT_RC=0
              echo "No existing RC tags found. Starting with rc0."
            else
-              # Get the highest RC number
              LAST_RC=$(echo "$EXISTING_RCS" | tail -1)
              LAST_RC_NUM=${LAST_RC#v${VERSION}-rc}
              NEXT_RC=$((LAST_RC_NUM + 1))
@@ -187,6 +461,7 @@ jobs:
          echo "rc_tag=${RC_TAG}" >> $GITHUB_OUTPUT
          echo "rc_number=${NEXT_RC}" >> $GITHUB_OUTPUT
          echo "ngc_version_tag=${VERSION}rc${NEXT_RC}" >> $GITHUB_OUTPUT
+          echo "helm_chart_version=${VERSION}-rc${NEXT_RC}" >> $GITHUB_OUTPUT
          echo "Will create tag: ${RC_TAG}"
      - name: Create RC tag
@@ -194,23 +469,16 @@ jobs:
          RC_TAG: ${{ steps.rc_tag.outputs.rc_tag }}
        run: |
          set -euo pipefail
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
-          # Create annotated tag
          git tag -a "${RC_TAG}" -m "Release candidate ${RC_TAG}"
-          # Push the tag
          git push origin "${RC_TAG}"
+          echo "Created and pushed tag: ${RC_TAG}"
-          echo "✅ Created and pushed tag: ${RC_TAG}"
      - name: Setup crane
        env:
          CRANE_VERSION: v0.20.2
        run: |
-          # Download crane from official Google releases
          curl -sL "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
            | tar -xzf - crane
          sudo mv crane /usr/local/bin/
@@ -220,9 +488,7 @@ jobs:
        run: |
          ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
          ECR_HOSTNAME="${ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com"
-          echo "Logging into ECR..."
          aws ecr get-login-password --region ${AWS_DEFAULT_REGION} | docker login --username AWS --password-stdin "${ECR_HOSTNAME}"
-          echo "✅ ECR login successful"
      - name: Login to NGC
        env:
@@ -236,143 +502,127 @@ jobs:
        env:
          NGC_REGISTRY: nvcr.io
          NGC_ORG: ${{ secrets.NGC_PUBLISH_ORG }}
-          RC_NUMBER: ${{ steps.rc_tag.outputs.rc_number }}
          NGC_VERSION_TAG: ${{ steps.rc_tag.outputs.ngc_version_tag }}
-          CI_PIPELINE_RESULT: ${{ needs.ci-pipeline.result }}
-          FRONTEND_BUILD_RESULT: ${{ needs.frontend-build.result }}
        run: |
          set -euo pipefail
-          # Track success/failure for summary
          SUCCESSFUL_COPIES=()
          FAILED_COPIES=()
-          # Get ECR hostname from instance role
          ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
          ECR_HOSTNAME="${ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com"
-          FRAMEWORKS=("vllm" "trtllm" "sglang")
          ARCHITECTURES=("amd64" "arm64")
          echo "========================================"
-          echo "Build Status:"
-          echo "  CI Pipeline: ${CI_PIPELINE_RESULT}"
-          echo "  Frontend Build: ${FRONTEND_BUILD_RESULT}"
-          echo "========================================"
-          echo ""
          echo "Copying images from ECR to NGC (registry-to-registry)"
          echo "NGC Version Tag: ${NGC_VERSION_TAG}"
+          echo "========================================"
+          copy_image() {
+            local SRC="$1" DST="$2" LABEL="$3"
+            echo "----------------------------------------"
+            echo "Copying: ${LABEL}"
+            if crane copy "${SRC}" "${DST}"; then
+              echo "  Copied: ${LABEL}"
+              SUCCESSFUL_COPIES+=("${LABEL}")
+              return 0
+            else
+              echo "  Warning: Failed to copy ${LABEL}, skipping..."
+              FAILED_COPIES+=("${LABEL}")
+              return 1
+            fi
+          }
+          create_manifest() {
+            local MANIFEST="$1" AMD64_IMG="$2" ARM64_IMG="$3" LABEL="$4"
+            echo "Creating manifest: ${MANIFEST}"
+            docker manifest create "${MANIFEST}" "${AMD64_IMG}" "${ARM64_IMG}" || true
+            if docker manifest push "${MANIFEST}"; then
+              echo "  Created multi-arch: ${LABEL}"
+              SUCCESSFUL_COPIES+=("${LABEL} (multi-arch)")
+            else
+              echo "  Failed to create multi-arch: ${LABEL}"
+              FAILED_COPIES+=("${LABEL} (multi-arch)")
+            fi
+          }
-          # Copy runtime images (from ci-test-suite.yml)
+          # ---- CUDA 12 runtime images (vllm and sglang) ----
-          for FRAMEWORK in "${FRAMEWORKS[@]}"; do
+          echo ""
+          echo "=== CUDA 12 Runtime Images (vllm, sglang) ==="
+          CUDA12_FRAMEWORKS=("vllm" "sglang")
+          for FRAMEWORK in "${CUDA12_FRAMEWORKS[@]}"; do
+            NGC_NAME="${FRAMEWORK}-runtime"
            for ARCH in "${ARCHITECTURES[@]}"; do
-              SOURCE_TAG="${IMAGE_PREFIX}-${FRAMEWORK}-${ARCH}"
+              SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-${FRAMEWORK}-cuda12-${ARCH}"
-              SOURCE_IMAGE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${SOURCE_TAG}"
+              TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}"
-              NGC_TAG="${NGC_VERSION_TAG}-${ARCH}"
+              copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-${ARCH}"
-              NGC_IMAGE="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${FRAMEWORK}-runtime:${NGC_TAG}"
-              echo "----------------------------------------"
-              echo "Copying: ${FRAMEWORK}-runtime:${NGC_TAG}"
-              if crane copy "${SOURCE_IMAGE}" "${NGC_IMAGE}"; then
-                echo "✅ Copied: ${FRAMEWORK}-runtime:${NGC_TAG}"
-                SUCCESSFUL_COPIES+=("${FRAMEWORK}-runtime:${NGC_TAG}")
-              else
-                echo "⚠️  Warning: Failed to copy ${FRAMEWORK} (${ARCH}), skipping..."
-                FAILED_COPIES+=("${FRAMEWORK}-runtime:${NGC_TAG}")
-              fi
            done
+            create_manifest \
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}" \
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-amd64" \
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-arm64" \
+              "${NGC_NAME}:${NGC_VERSION_TAG}"
          done
-          # Copy CUDA 13 images - both architectures
+          # ---- CUDA 13 runtime images (vllm, sglang, trtllm) ----
          echo ""
-          echo "Copying CUDA 13 images from ECR..."
+          echo "=== CUDA 13 Runtime Images (vllm, sglang, trtllm) ==="
-          CUDA13_FRAMEWORKS=("vllm" "sglang")
+          CUDA13_FRAMEWORKS=("vllm" "sglang" "trtllm")
          for FRAMEWORK in "${CUDA13_FRAMEWORKS[@]}"; do
+            if [ "${FRAMEWORK}" = "trtllm" ]; then
+              NGC_NAME="tensorrtllm-runtime"
+            else
+              NGC_NAME="${FRAMEWORK}-runtime"
+            fi
            for ARCH in "${ARCHITECTURES[@]}"; do
-              SOURCE_TAG="${IMAGE_PREFIX}-${FRAMEWORK}-cuda13-${ARCH}"
+              SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-${FRAMEWORK}-cuda13-${ARCH}"
-              SOURCE_IMAGE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${SOURCE_TAG}"
+              TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}"
-              NGC_TAG="${NGC_VERSION_TAG}-cuda13-${ARCH}"
+              copy_image "${SOURCE}" "${TARGET}" "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-${ARCH}"
-              NGC_IMAGE="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${FRAMEWORK}-runtime:${NGC_TAG}"
-              echo "----------------------------------------"
-              echo "Copying: ${FRAMEWORK}-runtime:${NGC_TAG}"
-              if crane copy "${SOURCE_IMAGE}" "${NGC_IMAGE}"; then
-                echo "✅ Copied: ${FRAMEWORK}-runtime:${NGC_TAG}"
-                SUCCESSFUL_COPIES+=("${FRAMEWORK}-runtime:${NGC_TAG}")
-              else
-                echo "⚠️  Warning: Failed to copy ${FRAMEWORK} CUDA13 (${ARCH}), skipping..."
-                FAILED_COPIES+=("${FRAMEWORK}-runtime:${NGC_TAG}")
-              fi
            done
-            # Create multi-arch manifest
+            create_manifest \
-            MULTIARCH="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13"
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13" \
-            echo "Creating manifest: ${MULTIARCH}"
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-amd64" \
-            docker manifest create "${MULTIARCH}" \
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${NGC_NAME}:${NGC_VERSION_TAG}-cuda13-arm64" \
-              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13-amd64" \
+              "${NGC_NAME}:${NGC_VERSION_TAG}-cuda13"
-              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13-arm64" || true
-            if docker manifest push "${MULTIARCH}"; then
-              echo "✅ Created multi-arch: ${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13"
-              SUCCESSFUL_COPIES+=("${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13 (multi-arch)")
-            else
-              echo "⚠️  Failed to create ${FRAMEWORK} CUDA13 multi-arch"
-              FAILED_COPIES+=("${FRAMEWORK}-runtime:${NGC_VERSION_TAG}-cuda13 (multi-arch)")
-            fi
          done
-          # Copy frontend images from ECR (built by build-frontend-image.yaml)
+          # ---- Frontend images ----
          echo ""
-          echo "Copying frontend images from ECR..."
+          echo "=== Frontend Images ==="
          FRONTEND_IMAGES=()
          for ARCH in "${ARCHITECTURES[@]}"; do
-            SOURCE_TAG="${{ github.sha }}-frontend-${ARCH}"
+            SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-frontend-${ARCH}"
-            SOURCE_IMAGE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${SOURCE_TAG}"
+            TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/dynamo-frontend:${NGC_VERSION_TAG}-${ARCH}"
-            NGC_TAG="${NGC_VERSION_TAG}-${ARCH}"
+            if copy_image "${SOURCE}" "${TARGET}" "dynamo-frontend:${NGC_VERSION_TAG}-${ARCH}"; then
-            NGC_IMAGE="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/dynamo-frontend:${NGC_TAG}"
+              FRONTEND_IMAGES+=("${TARGET}")
-            echo "----------------------------------------"
-            echo "Copying: dynamo-frontend:${NGC_TAG}"
-            if crane copy "${SOURCE_IMAGE}" "${NGC_IMAGE}"; then
-              echo "✅ Copied: dynamo-frontend:${NGC_TAG}"
-              SUCCESSFUL_COPIES+=("dynamo-frontend:${NGC_TAG}")
-              FRONTEND_IMAGES+=("${NGC_IMAGE}")
-            else
-              echo "⚠️  Warning: Failed to copy dynamo-frontend (${ARCH}), skipping..."
-              FAILED_COPIES+=("dynamo-frontend:${NGC_TAG}")
            fi
          done
-          # Create multi-arch manifest for frontend
-          echo ""
-          echo "Creating multi-arch manifest for dynamo-frontend..."
-          FRONTEND_MULTIARCH="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/dynamo-frontend:${NGC_VERSION_TAG}"
          if [ ${#FRONTEND_IMAGES[@]} -eq 2 ]; then
-            echo "Creating manifest index: ${FRONTEND_MULTIARCH}"
+            create_manifest \
-            docker manifest create "${FRONTEND_MULTIARCH}" \
+              "${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/dynamo-frontend:${NGC_VERSION_TAG}" \
-              "${FRONTEND_IMAGES[0]}" \
+              "${FRONTEND_IMAGES[0]}" "${FRONTEND_IMAGES[1]}" \
-              "${FRONTEND_IMAGES[1]}" || true
+              "dynamo-frontend:${NGC_VERSION_TAG}"
-            if docker manifest push "${FRONTEND_MULTIARCH}"; then
-              echo "✅ Created multi-arch manifest: dynamo-frontend:${NGC_VERSION_TAG}"
-              SUCCESSFUL_COPIES+=("dynamo-frontend:${NGC_VERSION_TAG} (multi-arch)")
-            else
-              echo "⚠️  Warning: Failed to create multi-arch manifest"
-              FAILED_COPIES+=("dynamo-frontend:${NGC_VERSION_TAG} (multi-arch)")
-            fi
          else
-            echo "⚠️  Warning: Not all architectures available, skipping multi-arch manifest"
+            echo "Warning: Not all frontend architectures available, skipping multi-arch manifest"
            FAILED_COPIES+=("dynamo-frontend:${NGC_VERSION_TAG} (multi-arch - missing archs)")
          fi
-          # Output counts for summary
+          # ---- Operator image (multi-arch manifest already built by operator-build) ----
+          echo ""
+          echo "=== Operator Image ==="
+          OPERATOR_SOURCE="${ECR_HOSTNAME}/${REGISTRY_IMAGE}:${IMAGE_PREFIX}-operator"
+          OPERATOR_TARGET="${NGC_REGISTRY}/${NGC_ORG}/ai-dynamo/kubernetes-operator:${NGC_VERSION_TAG}"
+          copy_image "${OPERATOR_SOURCE}" "${OPERATOR_TARGET}" "kubernetes-operator:${NGC_VERSION_TAG}"
+          # ---- Summary ----
          echo "successful_count=${#SUCCESSFUL_COPIES[@]}" >> $GITHUB_OUTPUT
          echo "failed_count=${#FAILED_COPIES[@]}" >> $GITHUB_OUTPUT
-          # Save lists for summary (newline-separated)
          printf '%s\n' "${SUCCESSFUL_COPIES[@]}" > /tmp/successful_copies.txt
          printf '%s\n' "${FAILED_COPIES[@]}" > /tmp/failed_copies.txt 2>/dev/null || true
@@ -382,21 +632,60 @@ jobs:
          echo "  Failed: ${#FAILED_COPIES[@]}"
          echo "========================================"
-          # Fail the step if all copies failed
          if [ ${#SUCCESSFUL_COPIES[@]} -eq 0 ]; then
-            echo "❌ ERROR: No images were successfully copied to NGC!"
+            echo "ERROR: No images were successfully copied to NGC!"
            exit 1
          fi
+      - name: Package and push Helm charts to NGC
+        env:
+          NGC_HELM_REPO: https://helm.ngc.nvidia.com/${{ secrets.NGC_PUBLISH_ORG }}/ai-dynamo
+          NGC_TOKEN: ${{ secrets.NGC_PUBLISH_TOKEN }}
+          HELM_CHART_VERSION: ${{ steps.rc_tag.outputs.helm_chart_version }}
+        run: |
+          set -euo pipefail
+          REPO_ALIAS="ngc-staging-dynamo"
+          helm plugin install https://github.com/chartmuseum/helm-push || true
+          helm repo add "${REPO_ALIAS}" \
+            --username='$oauthtoken' \
+            --password="${NGC_TOKEN}" \
+            "${NGC_HELM_REPO}" > /dev/null 2>&1
+          helm repo add nats https://nats-io.github.io/k8s/helm/charts/ || true
+          helm repo add bitnami https://charts.bitnami.com/bitnami || true
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "### Helm Charts" >> $GITHUB_STEP_SUMMARY
+          PLATFORM_CHART_DIR="deploy/helm/charts/platform"
+          CHART_NAME=$(awk '/^name:/ {print $2}' "${PLATFORM_CHART_DIR}/Chart.yaml")
+          pushd "${PLATFORM_CHART_DIR}"
+          helm dep build .
+          popd
+          echo "Packaging ${CHART_NAME} with version ${HELM_CHART_VERSION}..."
+          helm package \
+            --version "${HELM_CHART_VERSION}" \
+            --app-version "${HELM_CHART_VERSION}" \
+            "${PLATFORM_CHART_DIR}"
+          CHART_FILE="${CHART_NAME}-${HELM_CHART_VERSION}.tgz"
+          echo "Pushing ${CHART_FILE} to NGC Helm registry..."
+          helm cm-push "${CHART_FILE}" "${REPO_ALIAS}"
+          echo "- \`${CHART_NAME}:${HELM_CHART_VERSION}\` pushed to NGC Helm registry" >> $GITHUB_STEP_SUMMARY
+          helm repo remove "${REPO_ALIAS}"
      - name: Create release summary
        env:
          RC_TAG: ${{ steps.rc_tag.outputs.rc_tag }}
-          RC_NUMBER: ${{ steps.rc_tag.outputs.rc_number }}
          NGC_VERSION_TAG: ${{ steps.rc_tag.outputs.ngc_version_tag }}
+          HELM_CHART_VERSION: ${{ steps.rc_tag.outputs.helm_chart_version }}
          SUCCESSFUL_COUNT: ${{ steps.copy_images.outputs.successful_count }}
          FAILED_COUNT: ${{ steps.copy_images.outputs.failed_count }}
-          CI_PIPELINE_RESULT: ${{ needs.ci-pipeline.result }}
-          FRONTEND_BUILD_RESULT: ${{ needs.frontend-build.result }}
        run: |
          echo "## Release Summary" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
@@ -408,35 +697,28 @@ jobs:
          echo "| Commit | ${{ github.sha }} |" >> $GITHUB_STEP_SUMMARY
          echo "| Branch | ${{ github.ref_name }} |" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "### Build Status" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "| Pipeline | Result |" >> $GITHUB_STEP_SUMMARY
-          echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY
-          echo "| CI Pipeline | ${CI_PIPELINE_RESULT} |" >> $GITHUB_STEP_SUMMARY
-          echo "| Frontend Build | ${FRONTEND_BUILD_RESULT} |" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### NGC Publishing Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "- ✅ **Successful copies**: ${SUCCESSFUL_COUNT}" >> $GITHUB_STEP_SUMMARY
+          echo "- **Successful copies**: ${SUCCESSFUL_COUNT}" >> $GITHUB_STEP_SUMMARY
-          echo "- ⚠️ **Failed copies**: ${FAILED_COUNT}" >> $GITHUB_STEP_SUMMARY
+          echo "- **Failed copies**: ${FAILED_COUNT}" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "### Expected Images" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "Runtime images (CUDA 12):" >> $GITHUB_STEP_SUMMARY
+          echo "Runtime images (CUDA 12 - default):" >> $GITHUB_STEP_SUMMARY
-          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-{amd64,arm64}\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`vllm-runtime:${NGC_VERSION_TAG}\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`trtllm-runtime:${NGC_VERSION_TAG}-{amd64,arm64}\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`sglang-runtime:${NGC_VERSION_TAG}\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-{amd64,arm64}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Runtime images (CUDA 13):" >> $GITHUB_STEP_SUMMARY
-          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch: amd64, arm64)" >> $GITHUB_STEP_SUMMARY
+          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-cuda13-amd64\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`vllm-runtime:${NGC_VERSION_TAG}-cuda13-arm64\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`tensorrtllm-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13\` (multi-arch: amd64, arm64)" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13-amd64\`" >> $GITHUB_STEP_SUMMARY
+          echo "Operator image:" >> $GITHUB_STEP_SUMMARY
-          echo "- \`sglang-runtime:${NGC_VERSION_TAG}-cuda13-arm64\`" >> $GITHUB_STEP_SUMMARY
+          echo "- \`kubernetes-operator:${NGC_VERSION_TAG}\`" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Frontend images:" >> $GITHUB_STEP_SUMMARY
-          echo "- \`dynamo-frontend:${NGC_VERSION_TAG}\` (multi-arch: amd64, arm64)" >> $GITHUB_STEP_SUMMARY
+          echo "- \`dynamo-frontend:${NGC_VERSION_TAG}\` (multi-arch)" >> $GITHUB_STEP_SUMMARY
-          echo "- \`dynamo-frontend:${NGC_VERSION_TAG}-amd64\`" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "- \`dynamo-frontend:${NGC_VERSION_TAG}-arm64\`" >> $GITHUB_STEP_SUMMARY
+          echo "Helm chart:" >> $GITHUB_STEP_SUMMARY
+          echo "- \`dynamo-platform:${HELM_CHART_VERSION}\` (pushed to NGC Helm registry)" >> $GITHUB_STEP_SUMMARY