ci: Phase 1: Shared Workflow Improvements (#7345)

Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>

ci: Phase 1: Shared Workflow Improvements (#7345)
Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com>
a0e41273 · Dillon Cullinan · GitHub · 7a6db48e · a0e41273 · a0e41273
Unverified Commit a0e41273 authored Apr 09, 2026 by Dillon Cullinan Committed by GitHub Apr 09, 2026
10 changed files
--- a/.github/actions/pytest/action.yml
+++ b/.github/actions/pytest/action.yml
@@ -8,8 +8,8 @@ inputs:
  image_tag:
    description: 'Image Tag to run tests on'
    required: true
-  framework:
+  test_suite_name:
-    description: 'Framework name for test metrics'
+    description: 'Test suite name for test metrics'
    required: false
    default: 'unknown'
  test_type:
@@ -291,7 +291,7 @@ runs:
          echo "📊 ${TOTAL_TESTS} tests completed (${FAILED_TESTS} failed, ${ERROR_TESTS} errors)"
          # Rename XML file to unique name
-          JUNIT_NAME="pytest_test_report_${{ inputs.framework }}_${STR_TEST_TYPE}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml"
+          JUNIT_NAME="pytest_test_report_${{ inputs.test_suite_name }}_${STR_TEST_TYPE}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml"
          mv "$JUNIT_FILE" "test-results/$JUNIT_NAME"
          echo "📝 Renamed XML file to: $JUNIT_NAME"
        else
@@ -314,8 +314,8 @@ runs:
      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4.6.2
      if: always()  # Always upload test results, even if tests failed
      with:
-        name: test-results-${{ inputs.framework }}-${{ env.STR_TEST_TYPE }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
+        name: test-results-${{ inputs.test_suite_name }}-${{ env.STR_TEST_TYPE }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
-        path: test-results/pytest_test_report_${{ inputs.framework }}_${{ env.STR_TEST_TYPE }}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml
+        path: test-results/pytest_test_report_${{ inputs.test_suite_name }}_${{ env.STR_TEST_TYPE }}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml
        retention-days: 7
    - name: Upload Allure Results

--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -288,7 +288,7 @@ jobs:
        with:
          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
          pytest_marks: ${{ inputs.cpu_only_test_markers }}
-          framework: ${{ inputs.framework }}
+          test_suite_name: ${{ inputs.framework }}
          test_type: "pre_merge_cpu"
          platform_arch: ${{ matrix.arch }}
          hf_token: ${{ secrets.HF_TOKEN }}
@@ -304,7 +304,7 @@ jobs:
        with:
          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
          pytest_marks: ${{ inputs.single_gpu_test_markers }}
-          framework: ${{ inputs.framework }}
+          test_suite_name: ${{ inputs.framework }}
          test_type: "pre_merge_gpu"
          platform_arch: ${{ matrix.arch }}
          hf_token: ${{ secrets.HF_TOKEN }}
@@ -362,7 +362,7 @@ jobs:
        with:
          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
          pytest_marks: ${{ inputs.multi_gpu_test_markers }}
-          framework: ${{ inputs.framework }}
+          test_suite_name: ${{ inputs.framework }}
          test_type: "pre_merge_gpu"
          platform_arch: amd64
          hf_token: ${{ secrets.HF_TOKEN }}

--- a/.github/workflows/container-validation-dynamo.yml
+++ b/.github/workflows/container-validation-dynamo.yml
@@ -269,7 +269,7 @@ jobs:
        with:
          image_tag: ${{ env.IMAGE_TAG }}
          pytest_marks: "pre_merge and parallel and not (vllm or sglang or trtllm) and (gpu_0)"
-          framework: dynamo
+          test_suite_name: dynamo
          test_type: "pre_merge_parallel"
          platform_arch: amd64
          hf_token: ${{ secrets.HF_TOKEN }}
@@ -301,7 +301,7 @@ jobs:
        with:
          image_tag: ${{ env.IMAGE_TAG }}
          pytest_marks: "pre_merge and not parallel and not (vllm or sglang or trtllm) and (gpu_0)"
-          framework: dynamo
+          test_suite_name: dynamo
          test_type: "pre_merge_sequential"
          platform_arch: amd64
          hf_token: ${{ secrets.HF_TOKEN }}

--- a/.github/workflows/post-merge-ci.yml
+++ b/.github/workflows/post-merge-ci.yml
@@ -329,7 +329,7 @@ jobs:
  deploy-test-vllm:
    needs: [deploy-operator, vllm-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
+    uses: ./.github/workflows/shared-deploy-test.yml
    with:
      framework: vllm
      profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
@@ -341,7 +341,7 @@ jobs:
  deploy-test-sglang:
    needs: [deploy-operator, sglang-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
+    uses: ./.github/workflows/shared-deploy-test.yml
    with:
      framework: sglang
      profiles: '["agg", "agg_router"]'
@@ -353,7 +353,7 @@ jobs:
  deploy-test-trtllm:
    needs: [deploy-operator, trtllm-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
+    uses: ./.github/workflows/shared-deploy-test.yml
    with:
      framework: trtllm
      profiles: '["agg", "agg_router"]'

--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -7,14 +7,6 @@ on:
  push:
    branches:
      - "pull-request/[0-9]+"
-      # Note: release/* branches are handled by release.yml workflow
-  workflow_dispatch:
-    inputs:
-      run_deploy_operator:
-        description: 'Run deploy operator and deployment tests'
-        required: false
-        type: boolean
-        default: false
 concurrency:
  # The group name is the ref_name, so that workflows on the same PR/branch have the same group name for cancelling.
@@ -25,15 +17,11 @@ env:
  BUILDER_NAME: b-${{ github.run_id }}-${{ github.run_attempt }}
 jobs:
  # ============================================================================
  # SETUP & DETECTION JOBS
  # ============================================================================
  changed-files:
    runs-on: ubuntu-latest
-    environment: ${{ github.event_name == 'workflow_dispatch' && 'protected-deploy' || '' }}
    outputs:
      core: ${{ steps.changes.outputs.core }}
      planner: ${{ steps.changes.outputs.planner }}
@@ -60,10 +48,29 @@ jobs:
  backend-status-check:
    runs-on: ubuntu-latest
-    needs: [changed-files, planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator]  # THIS list determines blocking jobs
+    needs:
+      - changed-files
+      - operator
+      - vllm-build
+      - vllm-dev-build
+      - vllm-test
+      - vllm-multi-gpu-test
+      - vllm-copy-to-acr
+      - sglang-build
+      - sglang-dev-build
+      - sglang-test
+      - sglang-multi-gpu-test
+      - sglang-copy-to-acr
+      - trtllm-build
+      - trtllm-dev-build
+      - trtllm-test
+      - trtllm-multi-gpu-test
+      - trtllm-copy-to-acr
+      - planner-build
+      - planner-test
    if: always()
    steps:
-      - name: "Check all dependent jobs"
+      - name: Check all dependent jobs
        run: |
          echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
@@ -72,15 +79,10 @@ jobs:
    needs: [deploy-operator, deploy-test-vllm, deploy-test-sglang, deploy-test-trtllm]
    if: always()
    steps:
-      - name: "Check all deploy test jobs"
+      - name: Check all deploy test jobs
        run: |
          echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped", "cancelled"] | any($result == .))'
-  # ============================================================================
-  # Operator
-  # ============================================================================
  operator:
    needs: changed-files
    if: needs.changed-files.outputs.operator == 'true'
@@ -174,162 +176,348 @@ jobs:
            echo "| \`${image_uri}\` |" >> $GITHUB_STEP_SUMMARY
          done
-# ============================================================================
-# FRAMEWORK PIPELINES (Build → Test → Copy)
-# ============================================================================
  # ============================================================================
-  # PLANNER PIPELINE
+  # BUILD PIPELINES
  # ============================================================================
-  planner-pipeline:
-    name: planner
-    needs: [changed-files]
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true'
-    uses: ./.github/workflows/build-test-distribute-flavor.yml
-    with:
-      framework: dynamo
-      builder_flavor: general
-      target: planner
-      platform: 'linux/amd64'
-      cpu_only: true
-      builder_name: ${{ needs.changed-files.outputs.builder_name }}
-      build_timeout_minutes: 45
-      run_cpu_only_tests: true
-      cpu_only_test_markers: 'pre_merge and planner and gpu_0'
-      cpu_only_test_timeout_minutes: 30
-      run_single_gpu_tests: false
-      run_multi_gpu_tests: false
-      copy_to_acr: false
-    secrets: inherit
-  # ============================================================================
+  vllm-build:
-  # VLLM PIPELINE
+    name: vllm-runtime # This name overlaps with other vllm jobs to group them in the UI
-  # ============================================================================
-  vllm-pipeline:
-    name: vllm
    needs: [changed-files]
    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
-    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
      framework: vllm
      target: runtime
+      cuda_version: '["12.9", "13.0"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["12.9", "13.0"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
      build_timeout_minutes: 60
-      copy_timeout_minutes: 10
-      run_cpu_only_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
-      cpu_only_test_markers: 'pre_merge and vllm and gpu_0'
-      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
-      single_gpu_test_markers: 'pre_merge and vllm and gpu_1'
-      single_gpu_test_timeout_minutes: 35
-      run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
-      multi_gpu_test_markers: 'pre_merge and vllm and (gpu_2 or gpu_4)'
-      multi_gpu_test_timeout_minutes: 60
    secrets: inherit
-  # ============================================================================
+  vllm-dev-build:
-  # SGLANG PIPELINE
+    name: vllm-dev
-  # ============================================================================
-  sglang-pipeline:
-    name: sglang
    needs: [changed-files]
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true'
-    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
-      framework: sglang
+      framework: vllm
-      target: runtime
+      target: dev
+      cuda_version: '["12.9", "13.0"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["12.9", "13.0"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
+      push_image: false
      build_timeout_minutes: 60
-      copy_timeout_minutes: 10
-      run_cpu_only_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
-      cpu_only_test_markers: 'pre_merge and sglang and gpu_0'
-      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
-      single_gpu_test_markers: 'pre_merge and sglang and gpu_1'
-      run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
-      multi_gpu_test_markers: 'pre_merge and sglang and (gpu_2 or gpu_4)'
-      multi_gpu_test_timeout_minutes: 60
    secrets: inherit
-  # ============================================================================
+  sglang-build:
-  # TRTLLM PIPELINE
+    name: sglang-runtime # This name overlaps with other sglang jobs to group them in the UI
-  # ============================================================================
-  trtllm-pipeline:
-    name: trtllm
    needs: [changed-files]
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
-    uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
-      framework: trtllm
+      framework: sglang
      target: runtime
+      cuda_version: '["12.9", "13.0"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["13.1"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
      build_timeout_minutes: 60
-      copy_timeout_minutes: 10
-      run_cpu_only_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
-      cpu_only_test_markers: 'pre_merge and trtllm and gpu_0'
-      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
-      single_gpu_test_markers: 'pre_merge and trtllm and gpu_1'
-      run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
-      multi_gpu_test_markers: 'pre_merge and trtllm and (gpu_2 or gpu_4)'
-      multi_gpu_test_timeout_minutes: 60
    secrets: inherit
-  # ============================================================================
+  sglang-dev-build:
-  # DEV PIPELINES
+    name: sglang-dev
-  # ============================================================================
-  vllm-dev-pipeline:
-    name: vllm-dev
    needs: [changed-files]
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true'
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true'
-    uses: ./.github/workflows/build-flavor.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
-      framework: vllm
+      framework: sglang
      target: dev
+      cuda_version: '["12.9", "13.0"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["12.9", "13.0"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
-      build_timeout_minutes: 60
      push_image: false
+      build_timeout_minutes: 60
    secrets: inherit
-  sglang-dev-pipeline:
+  trtllm-build:
-    name: sglang-dev
+    name: trtllm-runtime # This name overlaps with other trtllm jobs to group them in the UI
    needs: [changed-files]
-    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true'
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
-    uses: ./.github/workflows/build-flavor.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
-      framework: sglang
+      framework: trtllm
-      target: dev
+      target: runtime
+      cuda_version: '["13.1"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["12.9", "13.0"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
      build_timeout_minutes: 60
-      push_image: false
    secrets: inherit
-  trtllm-dev-pipeline:
+  trtllm-dev-build:
    name: trtllm-dev
    needs: [changed-files]
    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true'
-    uses: ./.github/workflows/build-flavor.yml
+    uses: ./.github/workflows/shared-build-image.yml
    with:
      framework: trtllm
      target: dev
+      cuda_version: '["13.1"]'
      platform: 'linux/amd64,linux/arm64'
-      cuda_versions: '["13.1"]'
      builder_name: ${{ needs.changed-files.outputs.builder_name }}
-      build_timeout_minutes: 60
      push_image: false
+      build_timeout_minutes: 60
    secrets: inherit
-   # ============================================================================
+  planner-build:
-   # DEPLOYMENT JOBS
+    name: planner # This name overlaps with other planner jobs to group them in the UI
-   # Deploy operator and run end-to-end tests on Kubernetes cluster
+    needs: [changed-files]
-   # ============================================================================
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true'
+    uses: ./.github/workflows/shared-build-image.yml
+    with:
+      framework: dynamo
+      target: planner
+      cuda_version: '[""]'
+      platform: 'linux/amd64'
+      builder_name: ${{ needs.changed-files.outputs.builder_name }}
+      build_timeout_minutes: 45
+    secrets: inherit
+  # ============================================================================
+  # TEST PIPELINES
+  # ============================================================================
+  vllm-test:
+    name: vllm-runtime # This name overlaps with other vllm jobs to group them in the UI
+    needs: [changed-files, vllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: vllm
+      test_type: Test
+      amd_runner: prod-tester-amd-gpu-v1 # This runner is overridden for ARM platform
+      target_tag_plain: ${{ needs.vllm-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64", "arm64"]' # arm64 for CPU tests, single GPU tests are skipped
+      run_cpu_only_tests: true
+      cpu_only_test_markers: pre_merge and vllm and gpu_0
+      gpu_test_markers: pre_merge and vllm and gpu_1
+      gpu_test_timeout_minutes: 35
+    secrets: inherit
+  vllm-multi-gpu-test:
+    name: vllm-runtime # This name overlaps with other vllm jobs to group them in the UI
+    needs: [changed-files, vllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: vllm
+      test_type: Multi-GPU Test
+      amd_runner: prod-tester-amd-gpu-4-v1
+      target_tag_plain: ${{ needs.vllm-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64"]' # No ARM GPUs available
+      run_sanity_check: false
+      gpu_test_markers: pre_merge and vllm and (gpu_2 or gpu_4)
+      gpu_test_timeout_minutes: 60
+    secrets: inherit
+  sglang-test:
+    name: sglang-runtime # This name overlaps with other sglang jobs to group them in the UI
+    needs: [changed-files, sglang-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: sglang
+      test_type: Test
+      amd_runner: prod-tester-amd-gpu-v1 # This runner is overridden for ARM platform
+      target_tag_plain: ${{ needs.sglang-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64", "arm64"]' # arm64 for CPU tests, single GPU tests are skipped
+      run_cpu_only_tests: true
+      cpu_only_test_markers: pre_merge and sglang and gpu_0
+      gpu_test_markers: pre_merge and sglang and gpu_1
+    secrets: inherit
+  sglang-multi-gpu-test:
+    name: sglang-runtime # This name overlaps with other sglang jobs to group them in the UI
+    needs: [changed-files, sglang-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: sglang
+      test_type: Multi-GPU Test
+      amd_runner: prod-tester-amd-gpu-4-v1
+      target_tag_plain: ${{ needs.sglang-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64"]' # No ARM GPUs available
+      run_sanity_check: false
+      gpu_test_markers: pre_merge and sglang and (gpu_2 or gpu_4)
+      gpu_test_timeout_minutes: 60
+    secrets: inherit
+  trtllm-test:
+    name: trtllm-runtime # This name overlaps with other trtllm jobs to group them in the UI
+    needs: [changed-files, trtllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: trtllm
+      test_type: Test
+      amd_runner: prod-tester-amd-gpu-v1 # This runner is overridden for ARM platform
+      target_tag_plain: ${{ needs.trtllm-build.outputs.target_tag_plain }}
+      cuda_version: '["13.1"]'
+      platform: '["amd64", "arm64"]' # arm64 for CPU tests, single GPU tests are skipped
+      run_cpu_only_tests: true
+      cpu_only_test_markers: pre_merge and trtllm and gpu_0
+      gpu_test_markers: pre_merge and trtllm and gpu_1
+    secrets: inherit
+  trtllm-multi-gpu-test:
+    name: trtllm-runtime # This name overlaps with other trtllm jobs to group them in the UI
+    needs: [changed-files, trtllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: trtllm
+      test_type: Multi-GPU Test
+      amd_runner: prod-tester-amd-gpu-4-v1
+      target_tag_plain: ${{ needs.trtllm-build.outputs.target_tag_plain }}
+      cuda_version: '["13.1"]'
+      platform: '["amd64"]' # No ARM GPUs available
+      run_sanity_check: false
+      gpu_test_markers: pre_merge and trtllm and (gpu_2 or gpu_4)
+      gpu_test_timeout_minutes: 60
+    secrets: inherit
+  planner-test:
+    name: planner # This name overlaps with other planner jobs to group them in the UI
+    needs: [changed-files, planner-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true'
+    uses: ./.github/workflows/shared-test.yml
+    with:
+      test_suite_name: planner
+      test_type: CPU Test
+      amd_runner: prod-tester-amd-gpu-v1 # TODO: CPU only DinD runner for dynamo repo
+      target_tag_plain: ${{ needs.planner-build.outputs.target_tag_plain }}
+      cuda_version: '[""]'
+      platform: '["amd64"]'
+      run_sanity_check: false
+      run_cpu_only_tests: true
+      cpu_only_test_markers: 'pre_merge and planner and gpu_0'
+      cpu_only_test_timeout_minutes: 30
+      run_gpu_tests: false
+    secrets: inherit
+  # ============================================================================
+  # IMAGE COMPLIANCE PIPELINES
+  # ============================================================================
+  vllm-compliance:
+    name: vllm-runtime # This name overlaps with other vllm jobs to group them in the UI
+    needs: [changed-files, vllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-compliance.yml
+    with:
+      framework: vllm
+      target: runtime
+      target_tag_plain: ${{ needs.vllm-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64"]'
+    secrets: inherit
+  sglang-compliance:
+    name: sglang-runtime # This name overlaps with other sglang jobs to group them in the UI
+    needs: [changed-files, sglang-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-compliance.yml
+    with:
+      framework: sglang
+      target: runtime
+      target_tag_plain: ${{ needs.sglang-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      platform: '["amd64"]'
+    secrets: inherit
+  trtllm-compliance:
+    name: trtllm-runtime # This name overlaps with other trtllm jobs to group them in the UI
+    needs: [changed-files, trtllm-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true'
+    uses: ./.github/workflows/shared-compliance.yml
+    with:
+      framework: trtllm
+      target: runtime
+      target_tag_plain: ${{ needs.trtllm-build.outputs.target_tag_plain }}
+      cuda_version: '["13.1"]'
+      platform: '["amd64"]'
+    secrets: inherit
+  planner-compliance:
+    name: planner # This name overlaps with other planner jobs to group them in the UI
+    needs: [changed-files, planner-build]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true'
+    uses: ./.github/workflows/shared-compliance.yml
+    with:
+      framework: dynamo
+      target: planner
+      target_tag_plain: ${{ needs.planner-build.outputs.target_tag_plain }}
+      cuda_version: '[""]'
+      platform: '["amd64"]'
+    secrets: inherit
+  # ============================================================================
+  # IMAGE COPY PIPELINES
+  # ============================================================================
+  vllm-copy-to-acr:
+    name: vllm-runtime # This name overlaps with other vllm jobs to group them in the UI
+    needs: [changed-files, vllm-build, vllm-test]
+    if: |
+      always() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      needs.vllm-build.result == 'success' &&
+      (needs.vllm-test.result == 'success' || needs.vllm-test.result == 'skipped')
+    uses: ./.github/workflows/shared-copy.yml
+    with:
+      target_tag_plain: ${{ needs.vllm-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
+      copy_timeout_minutes: 10
+    secrets: inherit
+  sglang-copy-to-acr:
+    name: sglang-runtime # This name overlaps with other sglang jobs to group them in the UI
+    needs: [changed-files, sglang-build, sglang-test]
+    if: |
+      always() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      needs.sglang-build.result == 'success' &&
+      (needs.sglang-test.result == 'success' || needs.sglang-test.result == 'skipped')
+    uses: ./.github/workflows/shared-copy.yml
+    with:
+      target_tag_plain: ${{ needs.sglang-build.outputs.target_tag_plain }}
+      cuda_version: '["12.9", "13.0"]'
+      override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
+      copy_timeout_minutes: 10
+    secrets: inherit
+  trtllm-copy-to-acr:
+    name: trtllm-runtime # This name overlaps with other trtllm jobs to group them in the UI
+    needs: [changed-files, trtllm-build, trtllm-test]
+    if: |
+      always() &&
+      (needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      needs.trtllm-build.result == 'success' &&
+      (needs.trtllm-test.result == 'success' || needs.trtllm-test.result == 'skipped')
+    uses: ./.github/workflows/shared-copy.yml
+    with:
+      target_tag_plain: ${{ needs.trtllm-build.outputs.target_tag_plain }}
+      cuda_version: '["13.1"]'
+      override_arch: amd64 # We are using AMD64 images only on the rest of the clusters.
+      copy_timeout_minutes: 10
+    secrets: inherit
+  # ============================================================================
+  # DEPLOY TEST PIPELINES
+  # ============================================================================
  deploy-operator:
    if: |
@@ -346,26 +534,25 @@ jobs:
      vcluster_name: ${{ steps.setup.outputs.vcluster_name }}
      operator_tag: ${{ steps.setup.outputs.operator_tag }}
    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
-    - name: Setup vCluster and operator
+      - name: Setup vCluster and operator
-      id: setup
+        id: setup
-      uses: ./.github/actions/setup-dynamo-operator
+        uses: ./.github/actions/setup-dynamo-operator
-      with:
+        with:
-        kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
+          kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
-        registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
-        operator_tag: ${{ needs.operator.result == 'success' && needs.operator.outputs.operator_default_tag || 'main-operator' }}
+          operator_tag: ${{ needs.operator.result == 'success' && needs.operator.outputs.operator_default_tag || 'main-operator' }}
-        hf_token: ${{ secrets.HF_TOKEN }}
+          hf_token: ${{ secrets.HF_TOKEN }}
-        dockerhub_username: ${{ secrets.DOCKERHUB_LOGIN_USER }}
+          dockerhub_username: ${{ secrets.DOCKERHUB_LOGIN_USER }}
-        dockerhub_password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}
+          dockerhub_password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}
  deploy-test-vllm:
+    name: vllm Deploy Test
+    needs: [changed-files, deploy-operator, vllm-copy-to-acr, vllm-multi-gpu-test]
    if: |
      !cancelled() && !failure() &&
-      (needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      (needs.changed-files.outputs.vllm == 'true' || needs.changed-files.outputs.deploy == 'true')
-      needs.deploy-operator.result == 'success' &&
+    uses: ./.github/workflows/shared-deploy-test.yml
-      needs.vllm-pipeline.result == 'success'
-    needs: [changed-files, deploy-operator, vllm-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
    with:
      framework: vllm
      profiles: '["agg", "agg_router", "disagg", "disagg_router"]'
@@ -376,13 +563,12 @@ jobs:
    secrets: inherit
  deploy-test-sglang:
+    name: sglang Deploy Test
+    needs: [changed-files, deploy-operator, sglang-copy-to-acr, sglang-multi-gpu-test]
    if: |
      !cancelled() && !failure() &&
-      (needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      (needs.changed-files.outputs.sglang == 'true' || needs.changed-files.outputs.deploy == 'true')
-      needs.deploy-operator.result == 'success' &&
+    uses: ./.github/workflows/shared-deploy-test.yml
-      needs.sglang-pipeline.result == 'success'
-    needs: [changed-files, deploy-operator, sglang-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
    with:
      framework: sglang
      profiles: '["agg", "agg_router"]'
@@ -393,13 +579,12 @@ jobs:
    secrets: inherit
  deploy-test-trtllm:
+    name: trtllm Deploy Test
+    needs: [changed-files, deploy-operator, trtllm-copy-to-acr, trtllm-multi-gpu-test]
    if: |
      !cancelled() && !failure() &&
-      (needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true') &&
+      (needs.changed-files.outputs.trtllm == 'true' || needs.changed-files.outputs.deploy == 'true')
-      needs.deploy-operator.result == 'success' &&
+    uses: ./.github/workflows/shared-deploy-test.yml
-      needs.trtllm-pipeline.result == 'success'
-    needs: [changed-files, deploy-operator, trtllm-pipeline]
-    uses: ./.github/workflows/shared-deploy-test-framework.yml
    with:
      framework: trtllm
      profiles: '["agg", "agg_router"]'
@@ -414,14 +599,43 @@ jobs:
    needs: [deploy-operator, deploy-test-vllm, deploy-test-sglang, deploy-test-trtllm]
    runs-on: prod-default-small-v2
    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
-    - name: Teardown vCluster
+      - name: Teardown vCluster
-      if: needs.deploy-operator.outputs.namespace != '' && needs.deploy-operator.outputs.vcluster_name != ''
+        if: needs.deploy-operator.outputs.namespace != '' && needs.deploy-operator.outputs.vcluster_name != ''
-      uses: ./.github/actions/teardown-dynamo-operator
+        uses: ./.github/actions/teardown-dynamo-operator
-      with:
+        with:
-        kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
+          kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
-        vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
+          vcluster_name: ${{ needs.deploy-operator.outputs.vcluster_name }}
-        vcluster_namespace: ${{ needs.deploy-operator.outputs.namespace }}
+          vcluster_namespace: ${{ needs.deploy-operator.outputs.namespace }}
+  clean-k8s-builder:
+    name: Clean K8s builder if exists
+    runs-on: prod-default-small-v2
+    if: always()
+    needs:
+      - changed-files
+      - operator
+      - planner-test
+      - vllm-copy-to-acr
+      - vllm-multi-gpu-test
+      - sglang-copy-to-acr
+      - sglang-multi-gpu-test
+      - trtllm-copy-to-acr
+      - trtllm-multi-gpu-test
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Create K8s builders (skip bootstrap)
+        uses: ./.github/actions/bootstrap-buildkit
+        continue-on-error: true
+        with:
+          builder_name: ${{ needs.changed-files.outputs.builder_name }}
+          buildkit_worker_addresses: ''
+          skip_bootstrap: true
+      - name: Builder Cleanup in case of k8s builder
+        shell: bash
+        run: |
+          docker buildx rm ${{ needs.changed-files.outputs.builder_name }} || true
  # ============================================================================
  # ALLURE REPORT
@@ -429,9 +643,26 @@ jobs:
  # ============================================================================
  allure-report:
-    needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, deploy-test-vllm, deploy-test-sglang, deploy-test-trtllm]
+    needs:
-    # Disabled: gh-pages branch bloated to ~1GB after 72 commits of Allure reports
+      - changed-files
+      - operator
+      - vllm-build
+      - vllm-test
+      - vllm-multi-gpu-test
+      - vllm-copy-to-acr
+      - sglang-build
+      - sglang-test
+      - sglang-multi-gpu-test
+      - sglang-copy-to-acr
+      - trtllm-build
+      - trtllm-test
+      - trtllm-multi-gpu-test
+      - trtllm-copy-to-acr
+      - deploy-test-vllm
+      - deploy-test-sglang
+      - deploy-test-trtllm
    if: false
+    # Disabled: gh-pages branch bloated to ~1GB after 72 commits of Allure reports
    # if: ${{ !cancelled() }}
    uses: ./.github/workflows/generate-allure-report.yml
    with:
@@ -441,28 +672,3 @@ jobs:
    permissions:
      contents: write
      actions: read
-  # ============================================================================
-  # CLEANUP JOBS
-  # Clean up ephemeral Kubernetes namespace and resources
-  # ============================================================================
-  clean-k8s-builder:
-    name: Clean K8s builder if exists
-    runs-on: prod-default-small-v2
-    if: always()
-    needs: [planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator, changed-files]
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
-    - name: Create K8s builders (skip bootstrap)
-      uses: ./.github/actions/bootstrap-buildkit
-      continue-on-error: true
-      with:
-        builder_name: ${{ needs.changed-files.outputs.builder_name }}
-        buildkit_worker_addresses: '' # k8s builder
-        skip_bootstrap: true
-    - name: Builder Cleanup in case of k8s builder
-      shell: bash
-      run: |
-        docker buildx rm ${{ needs.changed-files.outputs.builder_name }} || true
--- a/.github/workflows/shared-build-image.yml
+++ b/.github/workflows/shared-build-image.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Shared Build Image
+on:
+  workflow_call:
+    inputs:
+      framework:
+        description: 'Framework name (vllm, sglang, trtllm)'
+        required: true
+        type: string
+      target:
+        description: 'Target stage for Docker rendering'
+        required: false
+        type: string
+        default: 'runtime'
+      cuda_version:
+        description: 'CUDA versions to build as a JSON array'
+        required: true
+        type: string
+      platform:
+        description: 'Target platforms to build as a JSON array'
+        required: true
+        type: string
+      builder_name:
+        description: 'Buildkit builder name'
+        required: true
+        type: string
+      build_timeout_minutes:
+        description: 'Timeout in minutes for the build step'
+        required: false
+        type: number
+        default: 60
+      extra_tags:
+        description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
+        required: false
+        type: string
+        default: ''
+      no_cache:
+        description: 'Disable Docker build cache'
+        required: false
+        type: boolean
+        default: false
+      fresh_builder:
+        description: 'Always create a fresh K8s BuildKit builder (skip remote worker routing)'
+        required: false
+        type: boolean
+        default: false
+      push_image:
+        description: 'Push image to registry'
+        required: false
+        type: boolean
+        default: true
+      no_load:
+        description: 'Do not load the image into docker'
+        required: false
+        type: boolean
+        default: true
+      show_summary:
+        description: 'Show summary'
+        required: false
+        type: boolean
+        default: false
+      make_efa:
+        description: 'Enable AWS EFA support in the build'
+        required: false
+        type: boolean
+        default: false
+      sanitized_ref_name:
+        description: 'Sanitized git ref name for branch-tagged images'
+        required: false
+        type: string
+        default: ''
+      build_only:
+        description: 'Build and push only — skip tests and prepare branch tags'
+        required: false
+        type: boolean
+        default: false
+      extra_build_args:
+        description: 'Extra build args to pass to docker build (newline-separated)'
+        required: false
+        type: string
+        default: ''
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+      SCCACHE_S3_BUCKET:
+        required: false
+      AWS_ACCESS_KEY_ID:
+        required: false
+      AWS_SECRET_ACCESS_KEY:
+        required: false
+      HF_TOKEN:
+        required: false
+    outputs:
+      target_tag_plain:
+        description: 'Plain runtime image tag prefix'
+        value: ${{ jobs.build.outputs.target_tag_plain }}
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda_version: ${{ fromJson(inputs.cuda_version) }}
+    runs-on: prod-builder-v3
+    # cuda_version not empty -- name: cuda12, linux/amd64
+    # cuda_version empty -- name: cpu, linux/amd64
+    name: Build multi-arch ${{ matrix.cuda_version == '' && 'cpu' || format('cuda{0}', matrix.cuda_version) }}
+    outputs:
+      target_tag_plain: ${{ steps.calculate-target-tag.outputs.target_tag_plain }}
+      test_tag_plain: ${{ steps.calculate-target-tag.outputs.test_tag_plain }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+        with:
+          lfs: true
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        run: |
+          TAG_BUILDER=${{ inputs.framework }}-${{ inputs.target }}
+          if [ "${{ inputs.make_efa }}" == "true" ]; then
+            TAG_BUILDER+="-efa"
+          fi
+          TARGET_TAG_PLAIN=${TAG_BUILDER}
+          if [ "${{ matrix.cuda_version }}" != "" ]; then
+            CUDA_VERSION="${{ matrix.cuda_version }}"
+            CUDA_MAJOR=${CUDA_VERSION%%.*}
+            TAG_BUILDER+="-cuda${CUDA_MAJOR}"
+          fi
+          IMAGE_TAG=${{ github.sha }}-${TAG_BUILDER}
+          TEST_IMAGE_TAG=${{ github.sha }}-${TAG_BUILDER}-test
+          IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${IMAGE_TAG}"
+          TEST_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TEST_IMAGE_TAG}"
+          echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
+          echo "image_uri=${IMAGE_URI}" >> $GITHUB_OUTPUT
+          echo "test_image_uri=${TEST_IMAGE_URI}" >> $GITHUB_OUTPUT
+      - name: Calculate Builder Flavor
+        id: calculate-builder-flavor
+        shell: bash
+        run: |
+          if [[ ${{ inputs.framework }} != @(vllm|sglang|trtllm) ]]; then
+            echo "builder_flavor=general" >> $GITHUB_OUTPUT
+          else
+            echo "builder_flavor=${{ inputs.framework }}" >> $GITHUB_OUTPUT
+          fi
+      - name: Initialize Dynamo Builder
+        uses: ./.github/actions/init-dynamo-builder
+        with:
+          builder_name: ${{ inputs.builder_name }}
+          flavor: ${{ steps.calculate-builder-flavor.outputs.builder_flavor }}
+          arch: ${{ inputs.platform }}
+          cuda_version: ${{ matrix.cuda_version }}
+          fresh_builder: ${{ inputs.fresh_builder }}
+      - name: Calculate extra tags
+        id: extra-tags
+        shell: bash
+        env:
+          EXTRA_TAGS: ${{ inputs.extra_tags }}
+        run: |
+          ECR_REGISTRY="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com"
+          RESULT=""
+          if [ -n "$EXTRA_TAGS" ]; then
+            while IFS= read -r tag; do
+              if [ -n "$tag" ]; then
+                RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}"$'\n'
+              fi
+            done <<< "$EXTRA_TAGS"
+          fi
+          if [ -n "$RESULT" ]; then
+            echo "tags<<EOF" >> $GITHUB_OUTPUT
+            echo "$RESULT" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+          else
+            echo "tags=" >> $GITHUB_OUTPUT
+          fi
+      - name: Print Build Container inputs
+        shell: bash
+        run: |
+          echo "=== Build Container Inputs ==="
+          echo "image_uri: ${{ steps.calculate-target-tag.outputs.image_uri }}"
+          echo "framework: ${{ inputs.framework }}"
+          echo "target: ${{ inputs.target }}"
+          echo "platform: ${{ inputs.platform }}"
+          echo "no_cache: ${{ inputs.no_cache }}"
+          echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
+          echo "push_image: ${{ inputs.push_image }}"
+          echo "no_load: ${{ inputs.no_load }}"
+          echo "build_timeout_minutes: ${{ inputs.build_timeout_minutes }}"
+      - name: Generate Dockerfile
+        shell: bash
+        run: |
+          echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
+          MAKE_EFA_FLAG=""
+          if [ "${{ inputs.make_efa }}" == "true" ]; then
+            MAKE_EFA_FLAG="--make-efa"
+          fi
+          # If CUDA version is empty, use empty arg to fallback to default (eg. for planner)
+          if [ "${{ matrix.cuda_version }}" == "" ]; then
+            CUDA_FLAG=""
+          else
+            CUDA_FLAG="--cuda-version=${{ matrix.cuda_version }}"
+          fi
+          python ./container/render.py \
+              --target=${{ inputs.target }} \
+              --framework=${{ inputs.framework }} \
+              --platform=${{ inputs.platform }} \
+              ${CUDA_FLAG} \
+              ${MAKE_EFA_FLAG} \
+              --show-result \
+              --output-short-filename
+      - name: Build and Push Image
+        id: build-image
+        uses: ./.github/actions/docker-remote-build
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.image_uri }}
+          framework: ${{ inputs.framework }}
+          target: ${{ inputs.target }}
+          platform: ${{ inputs.platform }}
+          cuda_version: ${{ matrix.cuda_version }}
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          no_cache: ${{ inputs.no_cache }}
+          extra_tags: ${{ steps.extra-tags.outputs.tags }}
+          push_image: ${{ inputs.push_image }}
+          no_load: ${{ inputs.no_load }}
+          extra_build_args: |
+            DYNAMO_COMMIT_SHA=${{ github.sha }}
+            ${{ inputs.extra_build_args }}
+      - name: Refresh BuildKit builder
+        if: ${{ inputs.target != 'dev' }}
+        uses: ./.github/actions/builder-refresher
+        with:
+          builder_name: ${{ inputs.builder_name }}
+          flavor: ${{ steps.calculate-builder-flavor.outputs.builder_flavor }}
+          arch: ${{ inputs.platform }}
+          cuda_version: ${{ matrix.cuda_version }}
+      - name: Build and Push Test Image
+        if: ${{ inputs.target != 'dev' }}
+        shell: bash
+        env:
+          ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
+        run: |
+          PLAIN_TAG="${{ steps.calculate-target-tag.outputs.target_tag_plain }}"
+          CACHE_TAG="test-${PLAIN_TAG}-cache"
+          CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}"
+          CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}"
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            CACHE_ARGS+=" --cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG},mode=max"
+          elif [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
+            CACHE_ARGS+=" --cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG},mode=max"
+          fi
+          PUSH_ARGS=""
+          if [ "${{ inputs.push_image }}" == "true" ]; then
+            PUSH_ARGS="--push"
+          elif [ "${{ inputs.no_load }}" == "false" ]; then
+            PUSH_ARGS="--load"
+          fi
+          NO_CACHE_ARG=""
+          if [ "${{ inputs.no_cache }}" == "true" ]; then
+            NO_CACHE_ARG="--no-cache"
+          fi
+          docker buildx build \
+            --progress=plain \
+            ${PUSH_ARGS} \
+            ${NO_CACHE_ARG} \
+            --platform ${{ inputs.platform }} \
+            -f container/Dockerfile.test \
+            --build-arg BASE_IMAGE=${{ steps.calculate-target-tag.outputs.image_uri }} \
+            ${CACHE_ARGS} \
+            -t ${{ steps.calculate-target-tag.outputs.test_image_uri }} .
+      - name: Show summary
+        shell: bash
+        if: ${{ inputs.push_image == 'true' && inputs.show_summary == 'true' }}
+        run: |
+          echo "### 🐳 ${{ steps.calculate-target-tag.outputs.target_tag_plain }} Default Image" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
+          echo "|-----|" >> $GITHUB_STEP_SUMMARY
+          echo "| \`${{ steps.calculate-target-tag.outputs.image_uri }}\` |" >> $GITHUB_STEP_SUMMARY
+          EXTRA_TAGS="${{ steps.extra-tags.outputs.tags }}"
+          if [ -n "$EXTRA_TAGS" ]; then
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "### 🏷️ Extra Tags" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
+            echo "|-----|" >> $GITHUB_STEP_SUMMARY
+            while IFS= read -r tag; do
+              if [ -n "$tag" ]; then
+                echo "| \`${tag}\` |" >> $GITHUB_STEP_SUMMARY
+              fi
+            done <<< "$EXTRA_TAGS"
+          fi
--- a/.github/workflows/shared-compliance.yml
+++ b/.github/workflows/shared-compliance.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Shared Compliance Scan
+on:
+  workflow_call:
+    inputs:
+      framework:
+        description: 'Framework name (vllm, sglang, trtllm)'
+        required: true
+        type: string
+      target:
+        description: 'Target docker stage'
+        required: true
+        type: string
+      target_tag_plain:
+        description: 'Plain runtime image tag prefix from the build workflow'
+        required: true
+        type: string
+      cuda_version:
+        description: 'CUDA versions to test as a JSON array'
+        required: true
+        type: string
+      platform:
+        description: 'Target platforms to test as a JSON array'
+        required: true
+        type: string
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+jobs:
+  compliance:
+    strategy:
+      fail-fast: false
+      matrix:
+        platform: ${{ fromJson(inputs.platform) }}
+        cuda_version: ${{ fromJson(inputs.cuda_version) }}
+    runs-on: prod-builder-v3
+    # cuda_version not empty -- name: cuda12, linux/amd64
+    # cuda_version empty -- name: cpu, linux/amd64
+    name: Compliance ${{ matrix.cuda_version == '' && 'cpu' || format('cuda{0}', matrix.cuda_version) }}, ${{ matrix.platform }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        env:
+          ECR_REPOSITORY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo
+        run: |
+          CUDA_VERSION="${{ matrix.cuda_version }}"
+          CUDA_MAJOR=${CUDA_VERSION%%.*}
+          if [[ "${{ inputs.target_tag_plain }}" == *"planner"* ]]; then
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}
+          else
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}-cuda${CUDA_MAJOR}
+          fi
+          RUNTIME_IMAGE=${ECR_REPOSITORY}:${IMAGE_TAG}
+          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Compliance scan
+        uses: ./.github/actions/compliance-scan
+        with:
+          image: ${{ steps.calculate-target-tag.outputs.runtime_image }}
+          artifact_name: compliance-${{ inputs.target_tag_plain }}-${{ matrix.cuda_version }}-${{ matrix.platform }}
+          arch: ${{ matrix.platform }}
+          framework: ${{ inputs.framework }}
+          target: ${{ inputs.target }}
+          cuda_version: ${{ matrix.cuda_version }}
\ No newline at end of file
--- a/.github/workflows/shared-copy.yml
+++ b/.github/workflows/shared-copy.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Shared Copy Image
+on:
+  workflow_call:
+    inputs:
+      target_tag_plain:
+        description: 'Plain runtime image tag prefix from the build workflow'
+        required: true
+        type: string
+      cuda_version:
+        description: 'CUDA versions to copy as a JSON array'
+        required: true
+        type: string
+      copy_timeout_minutes:
+        description: 'Timeout in minutes for the copy step'
+        required: false
+        type: number
+        default: 10
+      override_arch:
+        description: 'Override architecture for the copied image (e.g., "amd64")'
+        required: false
+        type: string
+        default: ''
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+jobs:
+  copy-to-acr:
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda_version: ${{ fromJson(inputs.cuda_version) }}
+    name: Copy to ACR cuda${{ matrix.cuda_version }}${{ inputs.override_arch != '' && format(', {0}', inputs.override_arch) || '' }}
+    runs-on: prod-default-small-v2
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        env:
+          ECR_REPOSITORY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo
+        run: |
+          CUDA_VERSION="${{ matrix.cuda_version }}"
+          CUDA_MAJOR=${CUDA_VERSION%%.*}
+          if [[ "${{ inputs.target_tag_plain }}" == *"planner"* ]]; then
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}
+          else
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}-cuda${CUDA_MAJOR}
+          fi
+          echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT
+      - name: Copy image to target registry
+        timeout-minutes: ${{ inputs.copy_timeout_minutes }}
+        uses: ./.github/actions/skopeo-copy
+        with:
+          source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
+          source_image: ai-dynamo/dynamo
+          source_tag: ${{ steps.calculate-target-tag.outputs.image_tag }}
+          target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          target_image: ai-dynamo/dynamo
+          target_tag: ${{ steps.calculate-target-tag.outputs.image_tag }}
+          source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+          override_arch: ${{ inputs.override_arch }} # We are using AMD64 images only on the rest of the clusters.
--- a/.github/workflows/shared-deploy-test-framework.yml
+++ b/.github/workflows/shared-deploy-test-framework.yml
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-name: Deploy Test Framework
+name: Shared Deploy Test
 on:
  workflow_call:
@@ -42,7 +42,7 @@ jobs:
      max-parallel: 2
      matrix:
        profile: ${{ fromJSON(inputs.profiles) }}
-    name: deploy-test-${{ inputs.framework }} (${{ matrix.profile }})
+    name: ${{ matrix.profile }}
    steps:
      - name: Checkout code
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0

--- a/.github/workflows/shared-test.yml
+++ b/.github/workflows/shared-test.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Shared Local GPU and CPU Test
+on:
+  workflow_call:
+    inputs:
+      test_suite_name:
+        description: 'Test suite name (vllm, sglang, trtllm)'
+        required: true
+        type: string
+      test_type:
+        description: 'Test type (e.g. single-gpu, multi-gpu)'
+        required: true
+        type: string
+      amd_runner:
+        description: 'Runner to execute tests on (amd64 only)'
+        required: true
+        type: string
+      target_tag_plain:
+        description: 'Plain runtime image tag prefix from the build workflow'
+        required: true
+        type: string
+      cuda_version:
+        description: 'CUDA versions to test as a JSON array'
+        required: true
+        type: string
+      platform:
+        description: 'Target platforms to test as a JSON array'
+        required: true
+        type: string
+      run_sanity_check:
+        description: 'Whether to run sanity check on the runtime image before executing tests'
+        required: false
+        type: boolean
+        default: true
+      run_cpu_only_tests:
+        description: 'Whether to run CPU-only tests'
+        required: false
+        type: boolean
+        default: false
+      cpu_only_test_markers:
+        description: 'CPU-only pytest markers'
+        required: false
+        type: string
+      cpu_only_test_timeout_minutes:
+        description: 'Timeout in minutes for CPU tests'
+        required: false
+        type: number
+        default: 10
+      run_gpu_tests:
+        description: 'Whether to run GPU tests'
+        required: false
+        type: boolean
+        default: true
+      gpu_test_markers:
+        description: 'GPU pytest markers'
+        required: false
+        type: string
+      gpu_test_timeout_minutes:
+        description: 'Timeout in minutes for GPU tests'
+        required: false
+        type: number
+        default: 30
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+      HF_TOKEN:
+        required: false
+jobs:
+  test:
+    strategy:
+      fail-fast: false
+      matrix:
+        platform: ${{ fromJson(inputs.platform) }}
+        cuda_version: ${{ fromJson(inputs.cuda_version) }}
+    name: ${{ inputs.test_type }} ${{ matrix.cuda_version == '' && 'cpu' || format('cuda{0}', matrix.cuda_version) }}, ${{ matrix.platform }}
+    runs-on: ${{ matrix.platform == 'amd64' && inputs.amd_runner || 'prod-tester-arm-v1' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        env:
+          ECR_REPOSITORY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo
+        run: |
+          CUDA_VERSION="${{ matrix.cuda_version }}"
+          CUDA_MAJOR=${CUDA_VERSION%%.*}
+          if [[ "${{ inputs.target_tag_plain }}" == *"planner"* ]]; then
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}
+          else
+            IMAGE_TAG=${{ github.sha }}-${{ inputs.target_tag_plain }}-cuda${CUDA_MAJOR}
+          fi
+          RUNTIME_IMAGE=${ECR_REPOSITORY}:${IMAGE_TAG}
+          TEST_IMAGE=${ECR_REPOSITORY}:${IMAGE_TAG}-test
+          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
+          echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Pull relevant images
+        shell: bash
+        run: |
+          source ./.github/scripts/retry_docker.sh
+          start_time=$(date +%s)
+          retry_pull ${{ steps.calculate-target-tag.outputs.runtime_image }}
+          retry_pull ${{ steps.calculate-target-tag.outputs.test_image }}
+          retry_pull quay.io/minio/minio
+          end_time=$(date +%s)
+          duration=$((end_time - start_time))
+          echo "⏱️ Image pull duration: ${duration}s"
+      - name: Run Sanity Check on Runtime Image
+        if: ${{ inputs.run_sanity_check }}
+        shell: bash
+        run: |
+          echo "Running sanity check on image: ${{ steps.calculate-target-tag.outputs.runtime_image }}"
+          export WORKSPACE=/workspace
+          set +e
+          docker run --rm "${{ steps.calculate-target-tag.outputs.runtime_image }}" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check
+          SANITY_CHECK_EXIT_CODE=$?
+          set -e
+          if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then
+            echo "ERROR: Sanity check failed - ai-dynamo packages not properly installed"
+            exit ${SANITY_CHECK_EXIT_CODE}
+          else
+            echo "✅ Sanity check passed"
+          fi
+      - name: Run CPU-only tests (parallelized)
+        if: ${{ inputs.run_cpu_only_tests }}
+        timeout-minutes: ${{ inputs.cpu_only_test_timeout_minutes }}
+        uses: ./.github/actions/pytest
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
+          pytest_marks: ${{ inputs.cpu_only_test_markers }}
+          test_suite_name: ${{ inputs.test_suite_name }}
+          test_type: "pre_merge_cpu"
+          platform_arch: ${{ matrix.platform }}
+          hf_token: ${{ secrets.HF_TOKEN }}
+          parallel_mode: 'auto'
+          dind_as_sidecar: 'true'
+      - name: Run GPU tests (sequential)
+        timeout-minutes: ${{ inputs.gpu_test_timeout_minutes }}
+        if: ${{ matrix.platform == 'amd64' && inputs.run_gpu_tests }}
+        uses: ./.github/actions/pytest
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
+          pytest_marks: ${{ inputs.gpu_test_markers }}
+          test_suite_name: ${{ inputs.test_suite_name }}
+          test_type: "pre_merge_gpu"
+          platform_arch: ${{ matrix.platform }}
+          hf_token: ${{ secrets.HF_TOKEN }}
+          parallel_mode: 'none'
+          dind_as_sidecar: 'true'