ci: Using Dynamo Builder (#5914)

6ac17b99 · Ran Rubin · GitHub · 50e17783 · 6ac17b99 · 6ac17b99
Unverified Commit 6ac17b99 authored Feb 05, 2026 by Ran Rubin Committed by GitHub Feb 05, 2026
15 changed files
--- a/.github/actions/bootstrap-buildkit/action.yml
+++ b/.github/actions/bootstrap-buildkit/action.yml
+name: 'Bootstrap Buildkit'
+description: 'Bootstrap buildkit builders using remote workers or Kubernetes driver'
+# This action supports two buildkit driver modes:
+#
+# 1. Remote Driver (when buildkit_worker_addresses is provided)
+#    Uses pre-provisioned remote buildkit workers specified via buildkit_worker_addresses.
+#    This is the preferred mode for faster, more reliable builds.
+#
+# 2. Kubernetes Driver (fallback, when buildkit_worker_addresses is empty)
+#    Dynamically creates buildkit pods in Kubernetes. Use this as a fallback when
+#    remote buildkit workers are unavailable or unreachable. The Kubernetes driver
+#    is slower due to pod startup time but provides on-demand build capacity.
+#
+# Options:
+#   - skip_bootstrap: Set to 'true' to only create the builder without bootstrapping.
+#     Useful for cleanup jobs that need to remove the builder but not bootstrap it.
+inputs:
+  builder_name:
+    description: 'Name for the buildx builder'
+    required: true
+  buildkit_worker_addresses:
+    description: 'Comma-separated list of remote buildkit worker addresses. If empty, falls back to Kubernetes driver.'
+    required: false
+    default: ''
+  # Kubernetes driver inputs (used when remote_builder is false)
+  ephemeral_storage:
+    description: 'Ephemeral storage request for Kubernetes driver'
+    required: false
+    default: '400Gi'
+  namespace:
+    description: 'Kubernetes namespace for buildkit pods'
+    required: false
+    default: 'buildkit'
+  replicas:
+    description: 'Number of buildkit replicas'
+    required: false
+    default: '1'
+  requests_cpu:
+    description: 'CPU requests for buildkit pods'
+    required: false
+    default: '12'
+  requests_memory:
+    description: 'Memory requests for buildkit pods'
+    required: false
+    default: '26Gi'
+  limits_memory:
+    description: 'Memory limits for buildkit pods'
+    required: false
+    default: '29Gi'
+  tolerations:
+    description: 'Tolerations for buildkit pods'
+    required: false
+    default: "key=buildkit-fallback-worker,value=true,operator=Equal,effect=NoSchedule"
+  skip_bootstrap:
+    description: 'Skip the bootstrap step (only create the builder)'
+    required: false
+    default: 'false'
+runs:
+  using: "composite"
+  steps:
+    - name: Define remote buildkit builders
+      if: inputs.buildkit_worker_addresses != ''
+      shell: bash
+      run: |
+        ADDRS="${{ inputs.buildkit_worker_addresses }}"
+        IFS=',' read -ra ADDR_LIST <<< "$ADDRS"
+        FIRST=true
+        for addr in "${ADDR_LIST[@]}"; do
+          if $FIRST; then
+            docker buildx create --use --name ${{ inputs.builder_name }} --driver remote "$addr"
+            FIRST=false
+          else
+            docker buildx create --append --name ${{ inputs.builder_name }} --driver remote "$addr"
+          fi
+        done
+    - name: Create Kubernetes builder for both platforms
+      if: inputs.buildkit_worker_addresses == ''
+      shell: bash
+      run: |
+        if docker buildx inspect ${{ inputs.builder_name }} > /dev/null 2>&1; then
+            # If exit code is 0 (success), print the message
+            echo "✅ Builder '${{ inputs.builder_name }}' already exists. Skipping creation."
+        else
+          echo "K8s Builder '${{ inputs.builder_name }}' does not exist. Creating it."
+          docker buildx create --use --name ${{ inputs.builder_name }} --driver kubernetes --platform=linux/amd64 \
+          '--driver-opt=requests.ephemeral-storage=${{ inputs.ephemeral_storage }}' \
+          '--driver-opt=namespace=${{ inputs.namespace }}' \
+          '--driver-opt=loadbalance=sticky' \
+          '--driver-opt=replicas=${{ inputs.replicas }}' \
+          '--driver-opt=requests.cpu=${{ inputs.requests_cpu }}' \
+          '--driver-opt=requests.memory=${{ inputs.requests_memory }}' \
+          '--driver-opt=limits.memory=${{ inputs.limits_memory }}' \
+          '--driver-opt="nodeselector=kubernetes.io/arch=amd64,role=dynamo-builder-fallback"' \
+          '--driver-opt="tolerations=${{ inputs.tolerations }}"'
+          docker buildx create --append --name ${{ inputs.builder_name }} --driver kubernetes --platform=linux/arm64 \
+          '--driver-opt=requests.ephemeral-storage=${{ inputs.ephemeral_storage }}' \
+          '--driver-opt=namespace=${{ inputs.namespace }}' \
+          '--driver-opt=loadbalance=sticky' \
+          '--driver-opt=replicas=${{ inputs.replicas }}' \
+          '--driver-opt=requests.cpu=${{ inputs.requests_cpu }}' \
+          '--driver-opt=requests.memory=${{ inputs.requests_memory }}' \
+          '--driver-opt=limits.memory=${{ inputs.limits_memory }}' \
+          '--driver-opt="nodeselector=kubernetes.io/arch=arm64,role=dynamo-builder-fallback"' \
+          '--driver-opt="tolerations=${{ inputs.tolerations }}"'
+        fi
+        sleep 3 # Give the builders some time to be ready
+        if [[ "${{ inputs.skip_bootstrap }}" != "true" ]]; then
+          echo "::warning::Build is using fallback pod. Please alert the ops team."
+          echo "## ⚠️ Fallback Build Warning" >> $GITHUB_STEP_SUMMARY
+          echo "This build is running on a **fallback pod**. Please alert the ops team." >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+        fi
+    - name: Bootstrap buildkit
+      if: inputs.skip_bootstrap != 'true'
+      shell: bash
+      run: |
+        echo "Bootstrapping buildkit..."
+        docker buildx inspect ${{ inputs.builder_name }}  --bootstrap
--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
@@ -322,4 +322,3 @@ runs:
        name: build-metrics-${{ inputs.framework }}-${{ inputs.target }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
        path: build-metrics/build-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
        retention-days: 7
--- a/.github/actions/docker-remote-build/action.yml
+++ b/.github/actions/docker-remote-build/action.yml
+name: 'Docker Build'
+description: 'Build Dynamo container images'
+inputs:
+  framework:
+    description: 'Framework to build'
+    required: true
+    default: 'vllm'
+  target:
+    description: 'Target to build'
+    required: false
+    default: 'runtime'
+  platform:
+    description: 'Docker platform to build on, ie. linux/amd64'
+    required: false
+    default: 'linux/amd64'
+  image_tag:
+    description: 'Custom image tag'
+    required: true
+  ci_token:
+    description: 'CI Token'
+    required: false
+  aws_default_region:
+    description: 'AWS Default Region'
+    required: false
+  sccache_s3_bucket:
+    description: 'SCCache S3 Bucket'
+    required: false
+  aws_account_id:
+    description: 'AWS Account ID'
+    required: false
+  aws_access_key_id:
+    description: 'AWS Access Key ID'
+    required: false
+  aws_secret_access_key:
+    description: 'AWS Secret Access Key'
+    required: false
+  base_image_tag:
+    description: 'Optional override for base image tag passed to build.sh'
+    required: false
+  runtime_image_tag:
+    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
+    required: false
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: true
+  enable_kvbm:
+    description: 'Enable KVBM support (optional)'
+    required: false
+  dynamo_base_image:
+    description: 'Pre-built Dynamo base image to use instead of building from scratch'
+    required: false
+  no_cache:
+    description: 'Disable Docker build cache'
+    required: false
+  extra_tags:
+    description: 'Additional image tags (newline-separated list of full image:tag references)'
+    required: false
+    default: ''
+  push_image:
+    description: 'Push the image to the registry'
+    required: false
+    default: 'false'
+  no_load:
+    description: 'Do not load the image into docker (useful for validation-only builds)'
+    required: false
+    default: 'true'
+  use_sccache:
+    description: 'Use SCCache for caching'
+    required: false
+    default: 'false'
+  ci:
+    description: 'CI mode: for frontend target, uses existing buildx builder and pushes EPP image to ECR'
+    required: false
+    default: 'false'
+outputs:
+  image_tag:
+    description: 'Image Tag'
+    value: ${{ steps.build.outputs.image_tag }}
+runs:
+  using: "composite"
+  steps:
+    - name: Build image
+      id: build
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ inputs.ci_token }}
+        AWS_DEFAULT_REGION: ${{ inputs.aws_default_region }}
+        SCCACHE_S3_BUCKET:  ${{ inputs.sccache_s3_bucket }}
+        AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
+        AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
+        PLATFORM: ${{ inputs.platform }}
+        ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
+        GITHUB_RUN_ID: ${{ github.run_id }}
+        GITHUB_JOB: ${{ github.job }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
+        CUDA_VERSION: ${{ inputs.cuda_version }}
+      run: |
+        set -x
+        IMAGE_TAG="${{ inputs.image_tag }}"
+        CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
+        BUILD_START_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+        echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV
+        echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
+        # Create build logs directory
+        mkdir -p build-logs
+        BUILD_LOG_FILE="build-logs/build-${{ inputs.framework }}-$(echo '${{ inputs.platform }}' | sed 's/linux\///').log"
+        echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
+        echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
+        # Collect optional overrides provided by the workflow
+        # Set base cache args and set --cache-to if this is a main commit
+        # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
+        EXTRA_ARGS=""
+        if [[ "${{ inputs.target }}" != "frontend" ]]; then
+          EXTRA_ARGS="--cache-to type=inline "
+          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
+          EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
+          if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
+            # Release branches also use release cache
+            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+          elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
+          fi
+        fi
+        echo "$EXTRA_ARGS"
+        # Collect optional overrides provided by the workflow
+        if [[ "${{ inputs.ci }}" == "true" ]]; then
+          # CI mode for frontend: use existing buildx builder, push EPP to registry
+          EXTRA_ARGS+=" --ci"
+        fi
+        if [ -n "${{ inputs.base_image_tag }}" ]; then
+          EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
+        fi
+        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
+          EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
+        fi
+        if [ -n "${{ inputs.cuda_version }}" ]; then
+          EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
+        fi
+        if [ -n "${{ inputs.dynamo_base_image }}" ]; then
+          EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
+        fi
+        if [ -n "${{ inputs.enable_kvbm }}" ]; then
+          EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
+        fi
+        if [ "${{ inputs.no_cache }}" == "true" ]; then
+          EXTRA_ARGS+=" --no-cache"
+        fi
+        if [ "${{ inputs.use_sccache }}" == "true" ]; then
+          EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
+        fi
+        if [ "${{ inputs.push_image }}" == "true" ]; then
+          EXTRA_ARGS+=" --push --no-load"
+        elif [ "${{ inputs.no_load }}" == "true" ]; then
+          EXTRA_ARGS+=" --no-load"
+        fi
+        # Add extra tags (each as a separate --tag argument)
+        EXTRA_TAGS="${{ inputs.extra_tags }}"
+        if [ -n "$EXTRA_TAGS" ]; then
+          while IFS= read -r EXTRA_TAG; do
+            if [ -n "$EXTRA_TAG" ]; then
+              EXTRA_ARGS+=" --tag ${EXTRA_TAG}"
+            fi
+          done <<< "$EXTRA_TAGS"
+        fi
+        # Execute build and capture output (show on console AND save to file)
+        ./container/build.sh --tag "$IMAGE_TAG" \
+          --target ${{ inputs.target }} \
+          --vllm-max-jobs 10 \
+          --no-tag-latest \
+          --framework ${{ inputs.framework }} \
+          --platform ${{ inputs.platform }} \
+          --sccache-bucket "$SCCACHE_S3_BUCKET" \
+          --sccache-region "$AWS_DEFAULT_REGION" \
+          $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
+        BUILD_EXIT_CODE=${PIPESTATUS[0]}
+        BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
+        echo "BUILD_END_TIME=${BUILD_END_TIME}" >> $GITHUB_ENV
+        # Exit with the build's exit code
+        exit ${BUILD_EXIT_CODE}
+    - name: Capture Build Metrics
+      id: metrics
+      shell: bash
+      run: |
+        # Create metrics directory
+        mkdir -p build-metrics
+        # Get accurate build timing
+        BUILD_START_TIME="${{ env.BUILD_START_TIME }}"
+        BUILD_END_TIME="${{ env.BUILD_END_TIME }}"
+        # Calculate duration
+        START_EPOCH=$(date -d "$BUILD_START_TIME" +%s)
+        END_EPOCH=$(date -d "$BUILD_END_TIME" +%s)
+        BUILD_DURATION_SEC=$((END_EPOCH - START_EPOCH))
+        echo "🕐 Build timing:"
+        echo "  Start: ${BUILD_START_TIME}"
+        echo "  End: ${BUILD_END_TIME}"
+        echo "  Duration: ${BUILD_DURATION_SEC} seconds"
+        # Get image size using docker inspect
+        IMAGE_TAG="${{ steps.build.outputs.image_tag }}"
+        if [ -n "$IMAGE_TAG" ]; then
+          IMAGE_SIZE_BYTES=$(docker image inspect "$IMAGE_TAG" --format='{{.Size}}' 2>/dev/null || echo "0")
+          echo "📦 Image size: ${IMAGE_SIZE_BYTES} bytes"
+        else
+          IMAGE_SIZE_BYTES=0
+          echo "⚠️  No image tag available"
+        fi
+        PLATFORM_ARCH=$(echo "${{ inputs.platform }}" | sed 's/linux\///')
+        echo "  Architecture: ${PLATFORM_ARCH}"
+        echo "PLATFORM_ARCH=${PLATFORM_ARCH}" >> $GITHUB_ENV
+        JOB_KEY="${{ inputs.framework }}-${PLATFORM_ARCH}"
+        echo "  Job Key: ${JOB_KEY}"
+        # Create job-specific metrics file
+        mkdir -p build-metrics
+        METRICS_FILE="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${{ github.run_id }}-${{ job.check_run_id }}.json"
+        # Create the job metrics file
+        cat > "$METRICS_FILE" << EOF
+        {
+          "framework": "${{ inputs.framework }}",
+          "target": "${{ inputs.target }}",
+          "platform": "${{ inputs.platform }}",
+          "platform_arch": "${PLATFORM_ARCH}",
+          "image_size_bytes": ${IMAGE_SIZE_BYTES},
+          "build_start_time": "${BUILD_START_TIME}",
+          "build_end_time": "${BUILD_END_TIME}",
+          "build_duration_sec": ${BUILD_DURATION_SEC}
+        }
+        EOF
+        cat "$METRICS_FILE"
+    - name: Generate Comprehensive Build Metrics
+      id: comprehensive-metrics
+      if: always()
+      shell: bash
+      run: |
+        echo "=========================================="
+        echo "📊 GENERATING COMPREHENSIVE BUILD METRICS"
+        echo "=========================================="
+        # Create metrics directory
+        mkdir -p build-metrics
+        PLATFORM_ARCH="${{ env.PLATFORM_ARCH }}"
+        WORKFLOW_ID="${{ github.run_id }}"
+        JOB_ID="${{ job.check_run_id }}"
+        FRAMEWORK_LOWER=$(echo "${{ inputs.framework }}" | tr '[:upper:]' '[:lower:]')
+        # Make parser executable
+        chmod +x .github/scripts/parse_buildkit_output.py
+        # Check for build logs and build stage arguments dynamically
+        # Use the BUILD_LOG_FILE set during the build step
+        BUILD_LOG="${{ env.BUILD_LOG_FILE }}"
+        # Path to container metadata created in previous step
+        CONTAINER_METADATA="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json"
+        # Output single comprehensive JSON with all build stages
+        COMPREHENSIVE_JSON="build-metrics/build-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json"
+        echo "🚀 Parsing BuildKit outputs and merging with container metrics..."
+        # Build stage arguments dynamically based on which logs exist
+        STAGE_ARGS=()
+        if [ -f "$BUILD_LOG" ]; then
+          echo "  ✓ Found base image log: ${BUILD_LOG}"
+          STAGE_ARGS+=("runtime:${BUILD_LOG}")
+        else
+          echo "  ℹ️  No image log found"
+        fi
+        # Check for any additional stage logs (e.g., build-logs/stage3-*.log)
+        for extra_log in build-logs/stage*.log; do
+          if [ -f "$extra_log" ]; then
+            stage_name=$(basename "$extra_log" .log)
+            echo "  ✓ Found additional stage log: ${extra_log} (${stage_name})"
+            STAGE_ARGS+=("${stage_name}:${extra_log}")
+          fi
+        done
+        echo "Container Metadata: ${CONTAINER_METADATA}"
+        echo "Output: ${COMPREHENSIVE_JSON}"
+        echo ""
+        # Run parser with all discovered stages
+        # Usage: parse_buildkit_output.py <output_json> <stage1_name:log_file> [stage2_name:log_file] ... [--metadata=<file>]
+        set +e
+        python3 .github/scripts/parse_buildkit_output.py \
+          "$COMPREHENSIVE_JSON" \
+          "${STAGE_ARGS[@]}" \
+          "--metadata=${CONTAINER_METADATA}"
+        PARSER_EXIT_CODE=$?
+        set -e
+        echo ""
+        echo "📊 Parser exit code: ${PARSER_EXIT_CODE}"
+        if [ ${PARSER_EXIT_CODE} -eq 0 ] && [ -f "$COMPREHENSIVE_JSON" ]; then
+          echo "✅ Comprehensive build metrics generated successfully"
+          echo "📄 Output file: ${COMPREHENSIVE_JSON}"
+        else
+          echo "⚠️  Metrics generation had issues but continuing..."
+        fi
+    # Upload comprehensive build metrics as artifact
+    - name: Upload Comprehensive Build Metrics
+      uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f #v6
+      if: always()
+      with:
+        name: build-metrics-${{ inputs.framework }}-${{ inputs.target }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
+        path: build-metrics/build-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
+        retention-days: 7
--- a/.github/actions/dynamo-deploy-test/action.yml
+++ b/.github/actions/dynamo-deploy-test/action.yml
+name: 'Dynamo Graph Deployment Test'
+description: 'Deploy a DynamoGraphDeployment to Kubernetes, validate it serves requests, and cleanup'
+inputs:
+  # Kubernetes Configuration
+  kubeconfig_base64:
+    description: 'Base64-encoded kubeconfig for cluster access'
+    required: true
+  namespace:
+    description: 'Kubernetes namespace for deployment'
+    required: true
+  # Deployment Configuration
+  deployment_file:
+    description: 'Path to the DynamoGraphDeployment YAML file (relative to examples/backends/<framework>)'
+    required: true
+  framework:
+    description: 'Framework name (vllm, sglang, trtllm)'
+    required: true
+  framework_runtime_image:
+    description: 'Full container image reference for the framework runtime'
+    required: true
+  # Model Configuration
+  model_name:
+    description: 'Model name to test (e.g., Qwen/Qwen3-0.6B)'
+    required: false
+    default: 'Qwen/Qwen3-0.6B'
+  # Test Configuration
+  pod_ready_timeout:
+    description: 'Timeout for pods to become ready (kubectl wait format)'
+    required: false
+    default: '300s'
+  model_available_max_attempts:
+    description: 'Maximum attempts to wait for model availability'
+    required: false
+    default: '30'
+  model_available_retry_delay:
+    description: 'Delay between model availability checks (seconds)'
+    required: false
+    default: '5'
+  port_forward_delay:
+    description: 'Delay after port-forward to allow connection (seconds)'
+    required: false
+    default: '10'
+  test_identifier:
+    description: 'Unique identifier for test output (used for log file and artifact naming)'
+    required: true
+  # Request Configuration
+  max_tokens:
+    description: 'Maximum tokens for test request'
+    required: false
+    default: '30'
+  temperature:
+    description: 'Temperature for test request'
+    required: false
+    default: '0.0'
+  test_prompt:
+    description: 'Test prompt to send (optional, uses default if not provided)'
+    required: false
+    default: ''
+  # Validation Configuration
+  min_response_length:
+    description: 'Minimum expected response content length'
+    required: false
+    default: '100'
+  skip_cleanup:
+    description: 'Skip cleanup step (useful for debugging)'
+    required: false
+    default: 'false'
+outputs:
+  graph_name:
+    description: 'Name of the deployed DynamoGraphDeployment'
+    value: ${{ steps.deploy.outputs.graph_name }}
+  test_result:
+    description: 'Test result (0=pass, 1=fail)'
+    value: ${{ steps.test.outputs.test_result }}
+  test_log_path:
+    description: 'Path to test output log'
+    value: ${{ steps.setup-test-names.outputs.test_output_log_file }}
+  artifact_name:
+    description: 'Name of the uploaded artifact'
+    value: ${{ steps.setup-test-names.outputs.artifact_name }}
+runs:
+  using: "composite"
+  steps:
+    - name: Setup Kubeconfig
+      id: setup-kubeconfig
+      shell: bash
+      run: |
+        echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig
+        chmod 600 ${{ github.workspace }}/.kubeconfig
+        echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV
+        export KUBECONFIG=${{ github.workspace }}/.kubeconfig
+        kubectl config set-context --current --namespace=${{ inputs.namespace }}
+        kubectl config get-contexts
+    - name: Setup Test Output Names
+      id: setup-test-names
+      shell: bash
+      run: |
+        TEST_IDENTIFIER="${{ inputs.test_identifier }}"
+        echo "test_output_log_file=deploy_test_output_${TEST_IDENTIFIER}.log" >> $GITHUB_OUTPUT
+        # Replace underscores with dashes for artifact name (GitHub artifact naming convention)
+        ARTIFACT_NAME="test-results-${TEST_IDENTIFIER//_/-}"
+        echo "artifact_name=${ARTIFACT_NAME}" >> $GITHUB_OUTPUT
+    - name: Deploy and Test
+      id: deploy
+      shell: bash
+      working-directory: ${{ github.workspace }}/examples/backends/${{ inputs.framework }}
+      env:
+        NAMESPACE: ${{ inputs.namespace }}
+        FRAMEWORK: ${{ inputs.framework }}
+        FRAMEWORK_RUNTIME_IMAGE: ${{ inputs.framework_runtime_image }}
+        DEPLOYMENT_FILE: ${{ inputs.deployment_file }}
+        MODEL_NAME: ${{ inputs.model_name }}
+        POD_READY_TIMEOUT: ${{ inputs.pod_ready_timeout }}
+      run: |
+        set -x
+        export KUBECONFIG=${{ github.workspace }}/.kubeconfig
+        kubectl config set-context --current --namespace=$NAMESPACE
+        # Redirect all output to a log file while still showing it
+        exec > >(tee -a "${{ steps.setup-test-names.outputs.test_output_log_file }}") 2>&1
+        export KUBE_NS=$NAMESPACE
+        export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE)
+        echo "graph_name=${GRAPH_NAME}" >> $GITHUB_OUTPUT
+        # Update the deployment file with the runtime image
+        # Use strenv() to ensure the image string is treated as plain string, not parsed as YAML
+        yq -i '.spec.services.[].extraPodSpec.mainContainer.image = strenv(FRAMEWORK_RUNTIME_IMAGE)' $DEPLOYMENT_FILE
+        echo "=== DEPLOYMENT CONFIGURATION ==="
+        echo "Framework: ${FRAMEWORK}"
+        echo "Runtime Image: ${FRAMEWORK_RUNTIME_IMAGE}"
+        echo "Graph Name: ${GRAPH_NAME}"
+        echo "Namespace: ${KUBE_NS}"
+        echo ""
+        echo "=== UPDATED DEPLOYMENT FILE ==="
+        cat $DEPLOYMENT_FILE
+        # Apply the deployment
+        kubectl apply -n $KUBE_NS -f $DEPLOYMENT_FILE
+        # Wait for pods to be ready
+        echo "=== WAITING FOR PODS ==="
+        sleep 20
+        echo "Waiting for pods with label nvidia.com/dynamo-graph-deployment-name: $GRAPH_NAME"
+        if ! kubectl wait --for=condition=ready pod \
+          -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" \
+          -n ${KUBE_NS} \
+          --timeout=${POD_READY_TIMEOUT}; then
+          echo "::error::Pods failed to become ready within timeout"
+          echo "deploy_failed=true" >> $GITHUB_OUTPUT
+          exit 1
+        fi
+        echo "=== FINAL POD STATUSES ==="
+        kubectl get pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n $KUBE_NS -o wide
+        echo ""
+        kubectl get all -n $KUBE_NS
+    - name: Debug Pod Failure
+      id: debug-failure
+      if: failure() && steps.deploy.outputs.deploy_failed == 'true'
+      shell: bash
+      env:
+        NAMESPACE: ${{ inputs.namespace }}
+        FRAMEWORK: ${{ inputs.framework }}
+        DEPLOYMENT_FILE: ${{ inputs.deployment_file }}
+        GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }}
+      run: |
+        export KUBECONFIG=${{ github.workspace }}/.kubeconfig
+        echo "## ❌ Pod Readiness Failure: ${FRAMEWORK} / ${DEPLOYMENT_FILE}" >> "$GITHUB_STEP_SUMMARY"
+        echo "" >> "$GITHUB_STEP_SUMMARY"
+        echo "**Graph Name:** \`${GRAPH_NAME}\`" >> "$GITHUB_STEP_SUMMARY"
+        echo "**Namespace:** \`${NAMESPACE}\`" >> "$GITHUB_STEP_SUMMARY"
+        echo "" >> "$GITHUB_STEP_SUMMARY"
+        echo "### All relevant Pods in Namespace" >> "$GITHUB_STEP_SUMMARY"
+        echo '```' >> "$GITHUB_STEP_SUMMARY"
+        kubectl get pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n ${NAMESPACE} -o wide >> "$GITHUB_STEP_SUMMARY" 2>&1
+        echo '```' >> "$GITHUB_STEP_SUMMARY"
+        echo "" >> "$GITHUB_STEP_SUMMARY"
+        # echo "### Pod Descriptions" >> "$GITHUB_STEP_SUMMARY"
+        # echo '```' >> "$GITHUB_STEP_SUMMARY"
+        # kubectl describe pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n ${NAMESPACE} >> "$GITHUB_STEP_SUMMARY" 2>&1
+        # echo '```' >> "$GITHUB_STEP_SUMMARY"
+        # echo "" >> "$GITHUB_STEP_SUMMARY"
+        echo "### Pod Logs (last 30 lines per container)" >> "$GITHUB_STEP_SUMMARY"
+        echo "" >> "$GITHUB_STEP_SUMMARY"
+        # Get logs pod by pod for better readability
+        PODS=$(kubectl get pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n ${NAMESPACE} -o jsonpath='{.items[*].metadata.name}')
+        if [ -z "$PODS" ]; then
+          echo "_No pods found matching the deployment label_" >> "$GITHUB_STEP_SUMMARY"
+        else
+          for POD in $PODS; do
+            echo "#### Pod: \`${POD}\`" >> "$GITHUB_STEP_SUMMARY"
+            echo '```' >> "$GITHUB_STEP_SUMMARY"
+            kubectl logs --tail=30 --all-containers=true ${POD} -n ${NAMESPACE} >> "$GITHUB_STEP_SUMMARY" 2>&1 || echo "No logs available for ${POD}" >> "$GITHUB_STEP_SUMMARY"
+            echo '```' >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+          done
+        fi
+    - name: Run Validation Tests
+      id: test
+      shell: bash
+      env:
+        NAMESPACE: ${{ inputs.namespace }}
+        FRAMEWORK: ${{ inputs.framework }}
+        MODEL_NAME: ${{ inputs.model_name }}
+        MAX_ATTEMPTS: ${{ inputs.model_available_max_attempts }}
+        RETRY_DELAY: ${{ inputs.model_available_retry_delay }}
+        PORT_FORWARD_DELAY: ${{ inputs.port_forward_delay }}
+        MAX_TOKENS: ${{ inputs.max_tokens }}
+        TEMPERATURE: ${{ inputs.temperature }}
+        MIN_RESPONSE_LENGTH: ${{ inputs.min_response_length }}
+        TEST_PROMPT: ${{ inputs.test_prompt }}
+        GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }}
+      run: |
+        set -x
+        export KUBECONFIG=${{ github.workspace }}/.kubeconfig
+        # Get frontend pod and setup port-forward
+        FRONTEND_POD=$(kubectl get pods -n ${NAMESPACE} \
+          -l nvidia.com/dynamo-component-type=frontend,nvidia.com/dynamo-graph-deployment-name=${GRAPH_NAME} \
+          -o jsonpath='{.items[0].metadata.name}')
+        CONTAINER_PORT=$(kubectl get pod $FRONTEND_POD -n ${NAMESPACE} \
+          -o jsonpath='{.spec.containers[0].ports[?(@.name=="http")].containerPort}')
+        echo "Frontend Pod: ${FRONTEND_POD}"
+        echo "Container Port: ${CONTAINER_PORT}"
+        kubectl port-forward pod/$FRONTEND_POD 8000:${CONTAINER_PORT} -n ${NAMESPACE} &
+        PORT_FORWARD_PID=$!
+        LLM_URL="http://localhost:8000"
+        sleep ${PORT_FORWARD_DELAY}
+        echo "LLM URL: ${LLM_URL}"
+        echo "Model Name: ${MODEL_NAME}"
+        # Wait for model to be available
+        ATTEMPT=1
+        while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
+          MODELS_RESPONSE=$(curl -s --retry 5 --retry-delay 2 --retry-connrefused "${LLM_URL}/v1/models" || true)
+          if echo "$MODELS_RESPONSE" | jq -e --arg MODEL_NAME "$MODEL_NAME" '.data[]?.id == $MODEL_NAME' >/dev/null 2>&1; then
+            echo "Model $MODEL_NAME is available in /v1/models"
+            break
+          fi
+          echo "Waiting for model $MODEL_NAME... (attempt $ATTEMPT/$MAX_ATTEMPTS)"
+          sleep ${RETRY_DELAY}
+          ATTEMPT=$((ATTEMPT + 1))
+        done
+        if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
+          echo "Model $MODEL_NAME not found after $MAX_ATTEMPTS attempts"
+          echo "Last response: $MODELS_RESPONSE"
+          echo "test_result=1" >> $GITHUB_OUTPUT
+          kill $PORT_FORWARD_PID 2>/dev/null || true
+          exit 1
+        fi
+        # Use default prompt if not provided
+        if [ -z "$TEST_PROMPT" ]; then
+          TEST_PROMPT="In the heart of Eldoria, an ancient land of boundless magic and mysterious creatures, lies the long-forgotten city of Aeloria. Once a beacon of knowledge and power, Aeloria was buried beneath the shifting sands of time, lost to the world for centuries. You are an intrepid explorer, known for your unparalleled curiosity and courage, who has stumbled upon an ancient map hinting at ests that Aeloria holds a secret so profound that it has the potential to reshape the very fabric of reality. Your journey will take you through treacherous deserts, enchanted forests, and across perilous mountain ranges. Your Task: Character Background: Develop a detailed background for your character. Describe their motivations for seeking out Aeloria, their skills and weaknesses, and any personal connections to the ancient city or its legends. Are they driven by a quest for knowledge, a search for lost familt clue is hidden."
+        fi
+        # Send test request
+        RESPONSE=$(curl -s -N --no-buffer --retry 10 --retry-delay 5 --retry-connrefused \
+          -X POST "${LLM_URL}/v1/chat/completions" \
+          -H 'accept: application/json' \
+          -H 'Content-Type: application/json' \
+          -d '{
+            "model": "'"${MODEL_NAME}"'",
+            "messages": [{"role": "user", "content": "'"${TEST_PROMPT}"'"}],
+            "stream": false,
+            "max_tokens": '"${MAX_TOKENS}"',
+            "temperature": '"${TEMPERATURE}"'
+          }' 2>&1)
+        echo "Response: $RESPONSE"
+        # Validate response
+        TEST_RESULT=0
+        if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
+          echo "❌ Test failed: Response is not valid JSON"
+          echo "Got: $RESPONSE"
+          TEST_RESULT=1
+        elif ! echo "$RESPONSE" | jq -e '.choices[0].message.role == "assistant"' >/dev/null 2>&1; then
+          echo "❌ Test failed: Message role is not 'assistant'"
+          echo "Got: $(echo "$RESPONSE" | jq '.choices[0].message.role')"
+          TEST_RESULT=1
+        elif ! echo "$RESPONSE" | jq -e '.model == "'"${MODEL_NAME}"'"' >/dev/null 2>&1; then
+          echo "❌ Test failed: Model name mismatch"
+          echo "Expected: ${MODEL_NAME}"
+          echo "Got: $(echo "$RESPONSE" | jq '.model')"
+          TEST_RESULT=1
+        elif ! echo "$RESPONSE" | jq -e '.choices[0].message.content | length > '"${MIN_RESPONSE_LENGTH}"'' >/dev/null 2>&1; then
+          echo "❌ Test failed: Response too short (min: ${MIN_RESPONSE_LENGTH})"
+          echo "Got length: $(echo "$RESPONSE" | jq '.choices[0].message.content | length')"
+          TEST_RESULT=1
+        else
+          echo "✅ Test passed: Response matches expected format and content"
+        fi
+        echo "test_result=${TEST_RESULT}" >> $GITHUB_OUTPUT
+        # Cleanup port-forward
+        kill $PORT_FORWARD_PID 2>/dev/null || true
+        exit $TEST_RESULT
+    - name: Cleanup Deployment
+      if: always() && inputs.skip_cleanup != 'true'
+      shell: bash
+      env:
+        NAMESPACE: ${{ inputs.namespace }}
+        GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }}
+      run: |
+        set -x
+        export KUBECONFIG=${{ github.workspace }}/.kubeconfig
+        echo "=== PRE-CLEANUP STATUS ==="
+        kubectl get dynamographdeployments -n $NAMESPACE || true
+        kubectl get pods -n $NAMESPACE || true
+        echo "Deleting DynamoGraphDeployment: ${GRAPH_NAME}"
+        kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE --timeout=60s || true
+    - name: Upload Test Results
+      uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f #v6
+      if: always()
+      with:
+        name: ${{ steps.setup-test-names.outputs.artifact_name }}
+        path: ${{ steps.setup-test-names.outputs.test_output_log_file }}
+        retention-days: 7
--- a/.github/actions/init-dynamo-builder/action.yml
+++ b/.github/actions/init-dynamo-builder/action.yml
+name: 'Initialize Dynamo Builder'
+description: 'Route buildkit workers and bootstrap buildx builder for dynamo builds'
+# This action combines buildkit worker discovery and builder bootstrapping into a single step.
+# It wraps route_buildkit.sh and bootstrap-buildkit action to simplify workflow configuration.
+#
+# How it works:
+#   1. Discovers available BuildKit pods via Kubernetes DNS using route_buildkit.sh
+#   2. Routes pods to the specified flavor based on modulo-3 strategy (see route_buildkit.sh)
+#   3. Bootstraps a docker buildx builder using the discovered workers
+#   4. Falls back to Kubernetes driver if no remote workers are available
+#
+# Architecture modes:
+#   - Single arch: Set arch to 'amd64' or 'arm64' to initialize for one architecture
+#   - Multi arch: Set all_arch to 'true' to initialize for both amd64 and arm64
+#
+# Flavor routing:
+#   BuildKit pods are assigned to flavors based on pod index modulo 3:
+#   - Pool 0 (mod 0): vllm-cuda12, trtllm-cuda12
+#   - Pool 1 (mod 1): vllm-cuda13, trtllm-cuda13, sglang-cuda13
+#   - Pool 2 (mod 2): sglang-cuda12, general (any/no CUDA)
+#
+# Usage examples:
+#   # Initialize for both architectures with general flavor:
+#   - uses: ./.github/actions/init-dynamo-builder
+#     with:
+#       builder_name: my-builder
+#       flavor: general
+#       all_arch: 'true'
+#
+#   # Initialize for single architecture with specific flavor and CUDA version:
+#   - uses: ./.github/actions/init-dynamo-builder
+#     with:
+#       builder_name: my-builder
+#       flavor: vllm
+#       arch: amd64
+#       cuda_version: '12.9'
+inputs:
+  builder_name:
+    description: 'Name for the buildx builder'
+    required: true
+  flavor:
+    description: 'Buildkit flavor (vllm, trtllm, sglang, general)'
+    required: false
+    default: 'general'
+  arch:
+    description: 'Target architecture (amd64, arm64). Ignored if all_arch is true.'
+    required: false
+    default: 'amd64'
+  all_arch:
+    description: 'If true, initialize builder for both amd64 and arm64 architectures'
+    required: false
+    default: 'false'
+  cuda_version:
+    description: 'CUDA version (12.9, 13.0). Optional for general flavor.'
+    required: false
+    default: ''
+  # Passthrough inputs for bootstrap-buildkit (kubernetes fallback)
+  ephemeral_storage:
+    description: 'Ephemeral storage request for Kubernetes driver'
+    required: false
+    default: '400Gi'
+  namespace:
+    description: 'Kubernetes namespace for buildkit pods'
+    required: false
+    default: 'buildkit'
+  replicas:
+    description: 'Number of buildkit replicas'
+    required: false
+    default: '1'
+  requests_cpu:
+    description: 'CPU requests for buildkit pods'
+    required: false
+    default: '12'
+  requests_memory:
+    description: 'Memory requests for buildkit pods'
+    required: false
+    default: '26Gi'
+  limits_memory:
+    description: 'Memory limits for buildkit pods'
+    required: false
+    default: '29Gi'
+  tolerations:
+    description: 'Tolerations for buildkit pods'
+    required: false
+    default: "key=buildkit-fallback-worker,value=true,operator=Equal,effect=NoSchedule"
+runs:
+  using: "composite"
+  steps:
+    - name: Route buildkit workers
+      id: route-buildkit
+      continue-on-error: true
+      shell: bash
+      run: |
+        CUDA_ARG=""
+        if [[ -n "${{ inputs.cuda_version }}" ]]; then
+          CUDA_ARG="--cuda ${{ inputs.cuda_version }}"
+        fi
+        if [[ "${{ inputs.all_arch }}" == "true" ]]; then
+           echo "running with --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG"
+          .github/scripts/route_buildkit.sh --arch all --flavor ${{ inputs.flavor }} $CUDA_ARG
+        else
+          echo "running with --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG"
+          .github/scripts/route_buildkit.sh --arch ${{ inputs.arch }} --flavor ${{ inputs.flavor }} $CUDA_ARG
+        fi
+    - name: Prepare worker addresses and platform
+      id: prepare
+      shell: bash
+      run: |
+        if [[ "${{ inputs.all_arch }}" == "true" ]]; then
+          # Combine both architecture outputs for multi-arch builds
+          AMD64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_amd64', inputs.flavor)] }}"
+          ARM64_ADDRS="${{ steps.route-buildkit.outputs[format('{0}_arm64', inputs.flavor)] }}"
+          if [[ -n "$AMD64_ADDRS" && -n "$ARM64_ADDRS" ]]; then
+            echo "worker_addresses=${AMD64_ADDRS},${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
+          elif [[ -n "$AMD64_ADDRS" ]]; then
+            echo "worker_addresses=${AMD64_ADDRS}" >> "$GITHUB_OUTPUT"
+          elif [[ -n "$ARM64_ADDRS" ]]; then
+            echo "worker_addresses=${ARM64_ADDRS}" >> "$GITHUB_OUTPUT"
+          else
+            echo "worker_addresses=" >> "$GITHUB_OUTPUT"
+          fi
+        else
+          # Single architecture build
+          echo "worker_addresses=${{ steps.route-buildkit.outputs[format('{0}_{1}', inputs.flavor, inputs.arch)] }}" >> "$GITHUB_OUTPUT"
+        fi
+    - name: Bootstrap buildkit
+      uses: ./.github/actions/bootstrap-buildkit
+      with:
+        builder_name: ${{ inputs.builder_name }}
+        buildkit_worker_addresses: ${{ steps.prepare.outputs.worker_addresses }}
+        ephemeral_storage: ${{ inputs.ephemeral_storage }}
+        namespace: ${{ inputs.namespace }}
+        replicas: ${{ inputs.replicas }}
+        requests_cpu: ${{ inputs.requests_cpu }}
+        requests_memory: ${{ inputs.requests_memory }}
+        limits_memory: ${{ inputs.limits_memory }}
+        tolerations: ${{ inputs.tolerations }}
--- a/.github/actions/pytest/action.yml
+++ b/.github/actions/pytest/action.yml
@@ -8,10 +8,6 @@ inputs:
  image_tag:
    description: 'Image Tag to run tests on'
    required: true
-  cpu_limit:
-    description: 'Maximum number of cores available to docker'
-    required: false
-    default: '10'
  framework:
    description: 'Framework name for test metrics'
    required: false
@@ -38,7 +34,18 @@ inputs:
    default: 'false'
  hf_token:
    required: false
+  parallel_mode:
+    description: 'Parallelization mode: auto (use all cores), none/0 (sequential), or a number of workers'
+    required: false
+    default: 'auto'
+  dind_as_sidecar:
+    description: 'dind runs as a sidecar container (true/false)'
+    required: false
+    default: 'false'
+  cpu_limit:
+    description: 'Maximum number of cores available to docker'
+    required: false
+    default: '10'
 runs:
  using: "composite"
@@ -98,7 +105,8 @@ runs:
          sleep 1
        done
-    - name: Run tests
+    - name: Run tests for runner v1
+      if: inputs.dind_as_sidecar == 'false'
      shell: bash
      env:
        NUM_CPUS: ${{ inputs.cpu_limit }}
@@ -164,6 +172,96 @@ runs:
        # Always continue to results processing
        exit 0
+    - name: Run tests in dind as sidecar mode
+      if: inputs.dind_as_sidecar == 'true'
+      shell: bash
+      env:
+        CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}
+        PYTEST_XML_FILE: pytest_test_report.xml
+        HF_HOME: /runner/_work/_temp
+        HF_TOKEN: ${{ inputs.hf_token }}
+      run: |
+        # Run pytest with detailed output and JUnit XML
+        set +e  # Don't exit on test failures
+        # Define common docker flags for stability (Shared memory & limits)
+        # --ipc=host is critical for parallel pytest workers to communicate fast
+        DOCKER_OPTS="--ipc=host --ulimit memlock=-1 --ulimit stack=67108864"
+        # Determine docker runtime flags and pytest command based on dry_run mode
+        if [[ "${{ inputs.dry_run }}" == "true" ]]; then
+          echo "🔍 Running pytest in dry-run mode (collect-only, no GPU required)"
+          GPU_FLAGS=""
+          PYTEST_CMD="pytest -v --collect-only -m \"${{ inputs.pytest_marks }}\""
+        else
+          echo "🚀 Running pytest in normal mode"
+          MYPY_FLAG=""
+          if [[ "${{ inputs.enable_mypy }}" == "true" ]]; then
+            echo "🔍 Mypy type checking enabled"
+            MYPY_FLAG="--mypy"
+          fi
+          # Detect GPU availability and conditionally add GPU flags
+          GPU_FLAGS=""
+          # We check 'docker info' for the 'nvidia' runtime, which indicates the Daemon can spawn GPU containers.
+          if docker info 2>/dev/null | grep -i "runtimes" | grep -q "nvidia"; then
+            echo "✓ Docker Daemon supports Nvidia runtime, enabling GPU flags"
+            GPU_FLAGS="--gpus all"
+          else
+            echo "⚠️  Nvidia runtime not found in Docker Daemon, running in CPU-only mode"
+          fi
+          # Determine parallelization based on parallel_mode input
+          case "${{ inputs.parallel_mode }}" in
+            "auto")
+              PARALLEL_OPTS="-n auto"
+              echo "📊 Parallelization: auto (use all available cores)"
+              ;;
+            "none"|"0")
+              PARALLEL_OPTS="-n 0"
+              echo "📊 Parallelization: disabled (sequential execution) for GPU runs"
+              ;;
+            *)
+              PARALLEL_OPTS="-n ${{ inputs.parallel_mode }}"
+              echo "📊 Parallelization: ${{ inputs.parallel_mode }} workers"
+              ;;
+          esac
+          # Construct final command with xdist parallelization (-n) and other options
+          # --dist=loadscope groups tests by module/class to prevent race conditions in stateful tests
+          PYTEST_CMD="pytest ${PARALLEL_OPTS} --dist=loadscope --continue-on-collection-errors -v --tb=short --basetemp=/tmp/pytest_temp -o cache_dir=/tmp/.pytest_cache --junitxml=/workspace/test-results/${{ env.PYTEST_XML_FILE }} --durations=10 ${MYPY_FLAG} -m \"${{ inputs.pytest_marks }}\""
+        fi
+        # Get absolute path for test-results directory and ensure it has proper permissions
+        TEST_RESULTS_DIR="$(pwd)/test-results"
+        chmod 777 "${TEST_RESULTS_DIR}"
+        echo "📁 Test results will be saved to: ${TEST_RESULTS_DIR}"
+        echo "▶️ Executing: $PYTEST_CMD"
+        docker run ${GPU_FLAGS} ${DOCKER_OPTS} --rm -w /workspace \
+          --network host \
+          --env HF_TOKEN="${HF_TOKEN}" \
+          --name ${{ env.CONTAINER_ID }}_pytest \
+          -v "${TEST_RESULTS_DIR}:/workspace/test-results" \
+          ${{ inputs.image_tag }} \
+          bash -c "${PYTEST_CMD}"
+        TEST_EXIT_CODE=$?
+        echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
+        echo "🧪 Tests completed with exit code: ${TEST_EXIT_CODE}"
+        # Verify test results were written (only in normal mode)
+        if [[ "${{ inputs.dry_run }}" != "true" ]]; then
+          if [[ -f "${TEST_RESULTS_DIR}/${{ env.PYTEST_XML_FILE }}" ]]; then
+            echo "✅ Test results file found: ${TEST_RESULTS_DIR}/${{ env.PYTEST_XML_FILE }}"
+          else
+            echo "⚠️  Test results file not found: ${TEST_RESULTS_DIR}/${{ env.PYTEST_XML_FILE }}"
+          fi
+        fi
+        # Always continue to results processing
+        exit 0
    - name: Process Test Results
      shell: bash
      run: |

--- a/.github/actions/skopeo-copy/action.yml
+++ b/.github/actions/skopeo-copy/action.yml
+name: 'Skopeo Copy'
+description: 'Copy container images between registries using skopeo'
+inputs:
+  source_registry:
+    description: 'Source registry hostname (e.g., 123456789.dkr.ecr.us-east-1.amazonaws.com)'
+    required: true
+  source_image:
+    description: 'Source image name (e.g., ai-dynamo/dynamo)'
+    required: true
+  source_tag:
+    description: 'Source image tag'
+    required: true
+  target_registry:
+    description: 'Target registry hostname'
+    required: true
+  target_image:
+    description: 'Target image name (defaults to source_image if not specified)'
+    required: false
+  target_tag:
+    description: 'Target image tag (defaults to source_tag if not specified)'
+    required: false
+  # Skopeo Login inputs for source registry
+  source_aws_default_region:
+    description: 'AWS Default Region for source ECR'
+    required: false
+  source_aws_account_id:
+    description: 'AWS Account ID for source ECR'
+    required: false
+  source_azure_acr_hostname:
+    description: 'Azure ACR hostname for source registry'
+    required: false
+  source_azure_acr_user:
+    description: 'Azure ACR user for source registry'
+    required: false
+  source_azure_acr_password:
+    description: 'Azure ACR password for source registry'
+    required: false
+  # Skopeo Login inputs for target registry
+  target_aws_default_region:
+    description: 'AWS Default Region for target ECR'
+    required: false
+  target_aws_account_id:
+    description: 'AWS Account ID for target ECR'
+    required: false
+  target_azure_acr_hostname:
+    description: 'Azure ACR hostname for target registry'
+    required: false
+  target_azure_acr_user:
+    description: 'Azure ACR user for target registry'
+    required: false
+  target_azure_acr_password:
+    description: 'Azure ACR password for target registry'
+    required: false
+outputs:
+  target_image_ref:
+    description: 'Full target image reference'
+    value: ${{ steps.copy.outputs.target_image_ref }}
+runs:
+  using: "composite"
+  steps:
+    - name: Login to Source Registry
+      uses: ./.github/actions/skopeo-login
+      with:
+        aws_default_region: ${{ inputs.source_aws_default_region }}
+        aws_account_id: ${{ inputs.source_aws_account_id }}
+        azure_acr_hostname: ${{ inputs.source_azure_acr_hostname }}
+        azure_acr_user: ${{ inputs.source_azure_acr_user }}
+        azure_acr_password: ${{ inputs.source_azure_acr_password }}
+    - name: Login to Target Registry
+      uses: ./.github/actions/skopeo-login
+      with:
+        aws_default_region: ${{ inputs.target_aws_default_region }}
+        aws_account_id: ${{ inputs.target_aws_account_id }}
+        azure_acr_hostname: ${{ inputs.target_azure_acr_hostname }}
+        azure_acr_user: ${{ inputs.target_azure_acr_user }}
+        azure_acr_password: ${{ inputs.target_azure_acr_password }}
+    - name: Copy Image
+      id: copy
+      shell: bash
+      run: |
+        set -euo pipefail
+        SOURCE_REF="docker://${{ inputs.source_registry }}/${{ inputs.source_image }}:${{ inputs.source_tag }}"
+        # Use source values as defaults if target not specified
+        TARGET_IMAGE="${{ inputs.target_image }}"
+        if [ -z "$TARGET_IMAGE" ]; then
+          TARGET_IMAGE="${{ inputs.source_image }}"
+        fi
+        TARGET_TAG="${{ inputs.target_tag }}"
+        if [ -z "$TARGET_TAG" ]; then
+          TARGET_TAG="${{ inputs.source_tag }}"
+        fi
+        TARGET_REF="docker://${{ inputs.target_registry }}/${TARGET_IMAGE}:${TARGET_TAG}"
+        echo "Copying image:"
+        echo "  Source: ${SOURCE_REF}"
+        echo "  Target: ${TARGET_REF}"
+        skopeo copy --all "${SOURCE_REF}" "${TARGET_REF}"
+        echo "target_image_ref=${{ inputs.target_registry }}/${TARGET_IMAGE}:${TARGET_TAG}" >> $GITHUB_OUTPUT
+        echo "✅ Image copied successfully"
--- a/.github/actions/skopeo-login/action.yml
+++ b/.github/actions/skopeo-login/action.yml
+name: 'Skopeo Login'
+description: 'Login to multiple container registries using skopeo (ECR, ACR)'
+inputs:
+  aws_default_region:
+    description: 'AWS Default Region'
+    required: false
+  aws_account_id:
+    description: 'AWS Account ID'
+    required: false
+  azure_acr_hostname:
+    description: 'Azure ACR hostname'
+    required: false
+  azure_acr_user:
+    description: 'Azure ACR user'
+    required: false
+  azure_acr_password:
+    description: 'Azure ACR password'
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - name: Install skopeo
+      shell: bash
+      run: |
+        if ! command -v skopeo &> /dev/null; then
+          echo "Installing skopeo..."
+          if [ -f /etc/debian_version ]; then
+            # Added --fix-missing to handle 404s on transient package versions
+            sudo apt-get update
+            sudo apt-get install -y --fix-missing skopeo
+          elif [ -f /etc/redhat-release ]; then
+            sudo dnf install -y skopeo
+          else
+            echo "Unsupported OS for automatic skopeo installation"
+            exit 1
+          fi
+        else
+          echo "skopeo is already installed"
+        fi
+        skopeo --version
+    - name: ECR Login
+      shell: bash
+      if: ${{ inputs.aws_default_region != '' && inputs.aws_account_id != '' }}
+      env:
+        ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
+      run: |
+        set -euo pipefail
+        aws ecr get-login-password --region ${{ inputs.aws_default_region }} | skopeo login --username AWS --password-stdin "${ECR_HOSTNAME}"
+    - name: ACR Login
+      shell: bash
+      if: ${{ inputs.azure_acr_hostname != '' && inputs.azure_acr_user != '' && inputs.azure_acr_password != '' }}
+      run: |
+        set -euo pipefail
+        echo "${{ inputs.azure_acr_password }}" | skopeo login "${{ inputs.azure_acr_hostname }}" --username "${{ inputs.azure_acr_user }}" --password-stdin
--- a/.github/scripts/route_buildkit.sh
+++ b/.github/scripts/route_buildkit.sh
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# =============================================================================
+# route_buildkit.sh - Discover and route BuildKit pods for CI builds
+# =============================================================================
+#
+# ROUTING LOGIC:
+# --------------
+# Routing is optimized for Docker layer caching based on shared base images:
+#   - vLLM and SGLang share the same base image (cuda-dl-base) when CUDA versions match
+#   - TensorRT-LLM uses a different base (pytorch), so it's isolated
+#   - General builds have no framework, grouped with trtllm for isolation
+#
+# Flavors are routed to BuildKit pods using modulo 3 on the pod index:
+#   - Pool 0 (idx % 3 == 0): vllm-cuda12, sglang-cuda12  (share cuda-dl-base + wheel_builder cache)
+#   - Pool 1 (idx % 3 == 1): vllm-cuda13, sglang-cuda13  (share cuda-dl-base + wheel_builder cache)
+#   - Pool 2 (idx % 3 == 2): trtllm-cuda13, general      (isolated - different/no framework base)
+#
+# FALLBACK: If no pods match the target pool, the highest available index is used.
+#
+# EXPECTED ROUTING TABLE (pod indices returned for each flavor):
+# +------+-------------+---------------+-------------+---------------+---------------+---------+
+# | Pods | vllm-cuda12 | sglang-cuda12 | vllm-cuda13 | sglang-cuda13 | trtllm-cuda13 | general |
+# |      | (mod 0)     | (mod 0)       | (mod 1)     | (mod 1)       | (mod 2)       | (mod 2) |
+# +------+-------------+---------------+-------------+---------------+---------------+---------+
+# |  1   | 0           | 0             | 0 (fb)      | 0 (fb)        | 0 (fb)        | 0 (fb)  |
+# |  2   | 0           | 0             | 1           | 1             | 1 (fb)        | 1 (fb)  |
+# |  3   | 0           | 0             | 1           | 1             | 2             | 2       |
+# |  4   | 0, 3        | 0, 3          | 1           | 1             | 2             | 2       |
+# |  5   | 0, 3        | 0, 3          | 1, 4        | 1, 4          | 2             | 2       |
+# |  6   | 0, 3        | 0, 3          | 1, 4        | 1, 4          | 2, 5          | 2, 5    |
+# +------+-------------+---------------+-------------+---------------+---------------+---------+
+# (fb) = fallback - no pods matched target pool, returns max available index
+#
+# =============================================================================
+set -e
+# --- ARGUMENT PARSING ---
+ARCH_INPUT=""
+FLAVOR_INPUT=""
+CUDA_VERSION=""
+ALL_FLAVORS=("vllm" "trtllm" "sglang" "general")
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --arch)
+      ARCH_INPUT="$2"
+      shift 2
+      ;;
+    --flavor)
+      FLAVOR_INPUT="$2"
+      shift 2
+      ;;
+    --cuda)
+      CUDA_VERSION="$2"
+      shift 2
+      ;;
+    *)
+      echo "❌ Error: Unknown argument '$1'. Use --arch <amd64|arm64|all> --flavor <vllm|trtllm|sglang|general|all> [--cuda <12.9|13.0>]."
+      exit 1
+      ;;
+  esac
+done
+if [ -z "$ARCH_INPUT" ]; then
+  echo "❌ Error: Must specify --arch <amd64|arm64|all>."
+  exit 1
+fi
+if [ -z "$FLAVOR_INPUT" ]; then
+  echo "❌ Error: Must specify --flavor <vllm|trtllm|sglang|general|all>."
+  exit 1
+fi
+# CUDA version is required for all flavors except "general"
+if [ -z "$CUDA_VERSION" ] && [ "$FLAVOR_INPUT" != "general" ]; then
+  echo "❌ Error: Must specify --cuda <12.9|13.0> for flavor '$FLAVOR_INPUT'."
+  exit 1
+fi
+# Validate arch input
+case $ARCH_INPUT in
+  amd64|arm64|all) ;;
+  *)
+    echo "❌ Error: Invalid arch '$ARCH_INPUT'. Must be amd64, arm64, or all."
+    exit 1
+    ;;
+esac
+# Validate flavor input
+case $FLAVOR_INPUT in
+  vllm|trtllm|sglang|general|all) ;;
+  *)
+    echo "❌ Error: Invalid flavor '$FLAVOR_INPUT'. Must be vllm, trtllm, sglang, general, or all."
+    exit 1
+    ;;
+esac
+# Validate CUDA version input (allow empty for general flavor)
+if [ -n "$CUDA_VERSION" ]; then
+  case $CUDA_VERSION in
+    12.9|13.0|13.1) ;;
+    *)
+      echo "❌ Error: Invalid CUDA version '$CUDA_VERSION'. Must be 12.9, 13.0, or 13.1."
+      exit 1
+      ;;
+  esac
+fi
+# Determine architectures to process
+if [ "$ARCH_INPUT" = "all" ]; then
+  ARCHS=("amd64" "arm64")
+else
+  ARCHS=("$ARCH_INPUT")
+fi
+# Determine flavors to process
+if [ "$FLAVOR_INPUT" = "all" ]; then
+  FLAVORS=("${ALL_FLAVORS[@]}")
+else
+  FLAVORS=("$FLAVOR_INPUT")
+fi
+# --- CONFIGURATION ---
+NAMESPACE="buildkit"
+PORT="1234"
+MAX_POD_CHECK=10  # How many pod indices to probe (e.g., 0 to 3)
+# ---------------------
+if ! command -v nslookup &> /dev/null; then
+    echo "❌ Error: nslookup not found. Please install dnsutils or bind-tools."
+    exit 1
+fi
+# --- RETRY CONFIGURATION ---
+MAX_RETRIES=${MAX_RETRIES:-8}
+RETRY_DELAY=${RETRY_DELAY:-30}
+# ---------------------------
+# Function to discover SPECIFIC active pod indices
+# This handles gaps (e.g., if pod-0 and pod-2 are up, but pod-1 is down)
+get_active_indices() {
+  local arch=$1
+  local service_name=$2
+  local active_indices=()
+  # Loop through theoretical indices to see which ones actually resolve via DNS.
+  for (( i=0; i<MAX_POD_CHECK; i++ )); do
+    local pod_dns="buildkit-${arch}-${i}.${service_name}.${NAMESPACE}.svc.cluster.local"
+    # Check if this specific pod resolves
+    if nslookup "$pod_dns" >/dev/null 2>&1; then
+      active_indices+=("$i")
+    fi
+  done
+  echo "${active_indices[@]}"
+}
+# Function to route flavors to specific active indices based on Modulo 3
+get_target_indices() {
+  local flavor=$1
+  local cuda_version=$2
+  # Read remaining arguments as an array of available indices
+  local -a available_indices=("${@:3}")
+  if [ ${#available_indices[@]} -eq 0 ]; then
+    echo ""
+    return
+  fi
+  local cuda_major=${cuda_version%%.*}
+  local route_key="${flavor}-cuda${cuda_major}"
+  local target_mod
+  case "$route_key" in
+    # --- POOL 0: CUDA 12 builds (vLLM + SGLang share cuda-dl-base:cuda12.9) ---
+    vllm-cuda12|sglang-cuda12)
+      target_mod=0
+      ;;
+    # --- POOL 1: CUDA 13 builds (vLLM + SGLang share cuda-dl-base:cuda13.0) ---
+    vllm-cuda13|sglang-cuda13)
+      target_mod=1
+      ;;
+    # --- POOL 2: Isolated builds (TensorRT-LLM uses pytorch base, general has no framework) ---
+    trtllm-cuda13|general-*)
+      target_mod=2
+      ;;
+    # --- FALLBACK ---
+    *)
+      target_mod=2
+      ;;
+  esac
+  echo "    [DEBUG] Routing Key: '$route_key' -> Worker Index Modulo: $target_mod" >&2
+  local final_targets=()
+  # Filter the AVAILABLE indices (not just 0..count)
+  for idx in "${available_indices[@]}"; do
+    if [ $(( idx % 3 )) -eq "$target_mod" ]; then
+      final_targets+=("$idx")
+    fi
+  done
+  # If no pods match the specific modulo, fallback to the highest available index
+  if [ "${#final_targets[@]}" -eq "0" ]; then
+    local max_idx=${available_indices[0]}
+    for idx in "${available_indices[@]}"; do
+      if [ "$idx" -gt "$max_idx" ]; then
+        max_idx=$idx
+      fi
+    done
+    echo "$max_idx"
+  else
+    echo "${final_targets[@]}"
+  fi
+}
+# Process each architecture
+for ARCH in "${ARCHS[@]}"; do
+  SERVICE_NAME="buildkit-${ARCH}-headless"
+  POD_PREFIX="buildkit-${ARCH}"
+  echo "🔍 Discovering active Buildkit pods for ${ARCH} via DNS (checking indices 0-$((MAX_POD_CHECK-1)))..."
+  # Get the actual list of alive indices (e.g., "0 2 5")
+  ACTIVE_INDICES=($(get_active_indices "$ARCH" "$SERVICE_NAME"))
+  COUNT=${#ACTIVE_INDICES[@]}
+  # Retry loop if no pods found
+  if [ "$COUNT" -eq "0" ]; then
+    echo "⚠️  DNS returned 0 records for ${ARCH}. KEDA should be triggering a new buildkit pod."
+    for (( retry=1; retry<=MAX_RETRIES; retry++ )); do
+      echo "⏳ Waiting ${RETRY_DELAY}s for BuildKit pods to become available (attempt ${retry}/${MAX_RETRIES})..."
+      sleep "$RETRY_DELAY"
+      # Re-probe for active indices
+      ACTIVE_INDICES=($(get_active_indices "$ARCH" "$SERVICE_NAME"))
+      COUNT=${#ACTIVE_INDICES[@]}
+      if [ "$COUNT" -gt "0" ]; then
+        echo "✅ BuildKit pods for ${ARCH} are now available!"
+        break
+      fi
+      if [ "$retry" -eq "$MAX_RETRIES" ]; then
+        echo "::warning::No remote BuildKit pods available for ${ARCH} after ${MAX_RETRIES} attempts. Falling back to Kubernetes driver."
+        echo "⚠️  Warning: No remote BuildKit pods available for ${ARCH}."
+        for flavor in "${FLAVORS[@]}"; do
+          echo "${flavor}_${ARCH}=" >> "$GITHUB_OUTPUT"
+        done
+        exit 1
+      fi
+    done
+  fi
+  echo "✅ Found $COUNT active pod(s) (Indices: ${ACTIVE_INDICES[*]})."
+  # Iterate over flavors and set outputs
+  for flavor in "${FLAVORS[@]}"; do
+    # Pass the discovered ACTIVE_INDICES to the routing function
+    TARGET_INDICES=($(get_target_indices "$flavor" "$CUDA_VERSION" "${ACTIVE_INDICES[@]}"))
+    ADDRS=""
+    for idx in "${TARGET_INDICES[@]}"; do
+      POD_NAME="${POD_PREFIX}-${idx}"
+      ADDR="tcp://${POD_NAME}.${SERVICE_NAME}.${NAMESPACE}.svc.cluster.local:${PORT}"
+      if [ -z "$ADDRS" ]; then
+        ADDRS="$ADDR"
+      else
+        ADDRS="${ADDRS},${ADDR}"
+      fi
+    done
+    echo "    -> Routing ${flavor}_${ARCH} to pod indices: ${TARGET_INDICES[*]}"
+    # Write to GitHub Output
+    echo "${flavor}_${ARCH}=$ADDRS" >> "$GITHUB_OUTPUT"
+  done
+done
\ No newline at end of file
--- a/.github/workflows/build-test-distribute-flavor-matrix.yml
+++ b/.github/workflows/build-test-distribute-flavor-matrix.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Framework Matrix Pipeline
+on:
+  workflow_call:
+    inputs:
+      framework:
+        description: 'Framework name (vllm, sglang, trtllm)'
+        required: true
+        type: string
+      platforms:
+        description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
+        required: true
+        type: string
+      cuda_versions:
+        description: 'CUDA versions to build (JSON array, e.g., ["12.9", "13.0"])'
+        required: true
+        type: string
+      run_tests:
+        description: 'Whether to run pytest'
+        required: false
+        type: boolean
+        default: true
+      copy_to_acr:
+        description: 'Whether to copy images to ACR'
+        required: false
+        type: boolean
+        default: true
+      builder_name:
+        description: 'Buildkit builder name'
+        required: true
+        type: string
+      extra_tags:
+        description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
+        required: false
+        type: string
+        default: ''
+      build_image:
+        description: 'Whether to build image'
+        required: false
+        type: boolean
+        default: true
+      no_cache:
+        description: 'Disable Docker build cache'
+        required: false
+        type: boolean
+        default: false
+      push_image:
+        description: 'Push image to registry'
+        required: false
+        type: boolean
+        default: true
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+      CI_TOKEN:
+        required: false
+      SCCACHE_S3_BUCKET:
+        required: false
+      AWS_ACCESS_KEY_ID:
+        required: false
+      AWS_SECRET_ACCESS_KEY:
+        required: false
+      HF_TOKEN:
+        required: false
+jobs:
+  pipeline:
+    strategy:
+      fail-fast: false
+      matrix:
+        platform: ${{ fromJson(inputs.platforms) }}
+        cuda_version: ${{ fromJson(inputs.cuda_versions) }}
+    name: ${{ inputs.framework }}-cuda${{ matrix.cuda_version }}-${{ matrix.platform }}
+    uses: ./.github/workflows/build-test-distribute-flavor.yml
+    with:
+      framework: ${{ inputs.framework }}
+      platform: ${{ matrix.platform }}
+      cuda_version: ${{ matrix.cuda_version }}
+      extra_tags: ${{ inputs.extra_tags }}
+      no_cache: ${{ inputs.no_cache }}
+      builder_name: ${{ inputs.builder_name }}
+      build_image: ${{ inputs.build_image }}
+      run_tests: ${{ inputs.run_tests && !(inputs.framework == 'trtllm' && matrix.platform == 'arm64') }} # trtllm tests on arm64 are not supported
+      copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
+      push_image: ${{ inputs.push_image }}
+    secrets: inherit
--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: Build, Test, and Copy Framework Image
+on:
+  workflow_call:
+    inputs:
+      framework:
+        description: 'Framework name (vllm, sglang, trtllm)'
+        required: true
+        type: string
+      platform:
+        description: 'Platform to build (amd64 or arm64)'
+        required: true
+        type: string
+      cuda_version:
+        description: 'CUDA version to build (e.g., 12.9, 13.0)'
+        required: true
+        type: string
+      run_tests:
+        description: 'Whether to run pytest'
+        required: false
+        type: boolean
+        default: true
+      copy_to_acr:
+        description: 'Whether to copy images to ACR'
+        required: false
+        type: boolean
+        default: true
+      builder_name:
+        description: 'Buildkit builder name'
+        required: true
+        type: string
+      extra_tags:
+        description: 'Additional tags (newline-separated, -$platform suffix auto-appended)'
+        required: false
+        type: string
+        default: ''
+      build_image:
+        description: 'Whether to build image'
+        required: false
+        type: boolean
+        default: true
+      no_cache:
+        description: 'Disable Docker build cache'
+        required: false
+        type: boolean
+        default: false
+      push_image:
+        description: 'Push image to registry'
+        required: false
+        type: boolean
+        default: true
+      no_load:
+        description: 'Do not load the image into docker (you must have dind installed if you want to load the image)'
+        required: false
+        type: boolean
+        default: true
+      show_summary:
+        description: 'Show summary'
+        required: false
+        type: boolean
+        default: false
+    secrets:
+      AWS_DEFAULT_REGION:
+        required: true
+      AWS_ACCOUNT_ID:
+        required: true
+      AZURE_ACR_HOSTNAME:
+        required: true
+      AZURE_ACR_USER:
+        required: true
+      AZURE_ACR_PASSWORD:
+        required: true
+      CI_TOKEN:
+        required: false
+      SCCACHE_S3_BUCKET:
+        required: false
+      AWS_ACCESS_KEY_ID:
+        required: false
+      AWS_SECRET_ACCESS_KEY:
+        required: false
+      HF_TOKEN:
+        required: false
+    outputs:
+      image_tag:
+        description: 'Image tag in ACR'
+        value: ${{ jobs.copy-to-acr.outputs.target_tag_plain }}-${{ inputs.platform }}
+jobs:
+  # ============================================================================
+  # BUILD
+  # ============================================================================
+  build:
+    if: inputs.build_image
+    name: Build ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
+    runs-on: prod-builder-v2
+    outputs:
+      target_tag_plain: ${{ steps.calculate-target-tag.outputs.target_tag_plain }}
+    env:
+      FRAMEWORK: ${{ inputs.framework }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+        with:
+          lfs: true
+      - name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
+        id: extra-tags
+        shell: bash
+        env:
+          EXTRA_TAGS: ${{ inputs.extra_tags }}
+          PLATFORM: ${{ inputs.platform }}
+        run: |
+          if [ -n "$EXTRA_TAGS" ]; then
+            RESULT=""
+            while IFS= read -r tag; do
+              if [ -n "$tag" ]; then
+                RESULT+="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${tag}-${PLATFORM}"$'\n'
+              fi
+            done <<< "$EXTRA_TAGS"
+            echo "tags<<EOF" >> $GITHUB_OUTPUT
+            echo "$RESULT" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+          else
+            echo "tags=" >> $GITHUB_OUTPUT
+          fi
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        run: |
+          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}"
+          DEFAULT_TARGET_IMAGE_URI="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}-${{ inputs.platform }}"
+          echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
+          echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
+          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
+      - name: Initialize Dynamo Builder
+        uses: ./.github/actions/init-dynamo-builder
+        with:
+          builder_name: ${{ inputs.builder_name }}
+          flavor: ${{ inputs.framework }}
+          arch: ${{ inputs.platform }}
+          cuda_version: ${{ inputs.cuda_version }}
+      - name: Print Build Container inputs
+        run: |
+          echo "=== Build Container Inputs ==="
+          echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
+          echo "framework: ${{ inputs.framework }}"
+          echo "target: runtime"
+          echo "platform: linux/${{ inputs.platform }}"
+          echo "cuda_version: ${{ inputs.cuda_version }}"
+          echo "no_cache: ${{ inputs.no_cache }}"
+          echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
+          echo "push_image: ${{ inputs.push_image }}"
+          echo "no_load: ${{ inputs.no_load }}"
+      - name: Build Container
+        id: build-image
+        uses: ./.github/actions/docker-remote-build
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
+          framework: ${{ inputs.framework }}
+          target: runtime
+          platform: linux/${{ inputs.platform }}
+          cuda_version: ${{ inputs.cuda_version }}
+          ci_token: ${{ secrets.CI_TOKEN }}
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          no_cache: ${{ inputs.no_cache }}
+          extra_tags: ${{ steps.extra-tags.outputs.tags }}
+          push_image: ${{ inputs.push_image }}
+          no_load: ${{ inputs.no_load }}
+      - name: Show summary
+        shell: bash
+        if: ${{ inputs.push_image && inputs.show_summary }}
+        run: |
+          echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }} Default Image" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
+          echo "|-----|" >> $GITHUB_STEP_SUMMARY
+          echo "| \`${{ steps.calculate-target-tag.outputs.default_target_image_uri }}\` |" >> $GITHUB_STEP_SUMMARY
+  # ============================================================================
+  # TEST
+  # ============================================================================
+  test:
+    if: inputs.run_tests && inputs.build_image
+    needs: [build]
+    name: Test ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
+    runs-on: ${{ inputs.platform == 'amd64' && 'prod-tester-amd-gpu-v1' || 'prod-tester-arm-v1' }}
+    env:
+      FRAMEWORK: ${{ inputs.framework }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        run: |
+          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
+          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
+          echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Pull relevant images
+        shell: bash
+        run: |
+          start_time=$(date +%s)
+          docker pull ${{ steps.calculate-target-tag.outputs.test_image }}
+          docker pull quay.io/minio/minio
+          end_time=$(date +%s)
+          duration=$((end_time - start_time))
+          echo "⏱️ Image pull duration: ${duration}s"
+      - name: Run Sanity Check on Runtime Image
+        shell: bash
+        run: |
+          echo "Running sanity check on image: ${{ steps.calculate-target-tag.outputs.test_image }}"
+          # Run the sanity check script inside the container
+          # The script is located in /workspace/deploy/sanity_check.py in runtime containers
+          export WORKSPACE=/workspace
+          set +e
+          docker run --rm "${{ steps.calculate-target-tag.outputs.test_image }}" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check
+          SANITY_CHECK_EXIT_CODE=$?
+          set -e
+          if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then
+            echo "ERROR: Sanity check failed - ai-dynamo packages not properly installed"
+            exit ${SANITY_CHECK_EXIT_CODE}
+          else
+            echo "✅ Sanity check passed"
+          fi
+      # Run CPU-only tests first (parallelized for speed)
+      # These are unit tests marked with gpu_0 that don't require GPU hardware
+      - name: Run CPU-only tests (parallelized)
+        uses: ./.github/actions/pytest
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
+          pytest_marks: ${{ format('pre_merge and {0} and gpu_0', inputs.framework) }}
+          framework: ${{ inputs.framework }}
+          test_type: "pre_merge_cpu"
+          platform_arch: ${{ inputs.platform }}
+          enable_mypy: 'true'
+          hf_token: ${{ secrets.HF_TOKEN }}
+          parallel_mode: 'auto'
+          dind_as_sidecar: 'true'
+      # Run GPU tests sequentially (only on amd64 runners with GPU)
+      # These are e2e tests marked with gpu_1 that require GPU hardware
+      - name: Run GPU tests (sequential)
+        if: ${{ inputs.platform == 'amd64' }} # We only run GPU tests on amd64
+        uses: ./.github/actions/pytest
+        with:
+          image_tag: ${{ steps.calculate-target-tag.outputs.test_image }}
+          pytest_marks: ${{ format('pre_merge and {0} and gpu_1', inputs.framework) }}
+          framework: ${{ inputs.framework }}
+          test_type: "pre_merge_gpu"
+          platform_arch: ${{ inputs.platform }}
+          enable_mypy: 'false' # already covered by CPU tests
+          hf_token: ${{ secrets.HF_TOKEN }}
+          parallel_mode: 'none'
+          dind_as_sidecar: 'true'
+# ============================================================================
+# COPY TO ACR
+# ============================================================================
+  copy-to-acr:
+    needs: [build, test]
+    # Run if copy_to_acr is true AND build succeeded AND (test succeeded OR test was skipped)
+    if: |
+      always() &&
+      inputs.copy_to_acr &&
+      needs.build.result == 'success' &&
+      (needs.test.result == 'success' || needs.test.result == 'skipped')
+    name: copy ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
+    runs-on: prod-default-small-v2
+    outputs:
+      target_tag_plain: ${{ needs.build.outputs.target_tag_plain }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Calculate target tag
+        id: calculate-target-tag
+        shell: bash
+        run: |
+          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
+          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
+          echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Copy image to target registry
+        uses: ./.github/actions/skopeo-copy
+        with:
+          source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
+          source_image: ai-dynamo/dynamo
+          source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }}
+          target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          target_image: ai-dynamo/dynamo
+          target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}-${{ inputs.platform }}
+          source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          target_azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          target_azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
--- a/.github/workflows/ci-test-suite.yml
+++ b/.github/workflows/ci-test-suite.yml
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # Reusable CI Test Suite Workflow

--- a/.github/workflows/post-merge-ci.yml
+++ b/.github/workflows/post-merge-ci.yml
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 name: Post-Merge CI Pipeline

--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
--- a/container/build.sh
+++ b/container/build.sh
@@ -22,6 +22,7 @@ fi
 set -e
 TAG=
+PRIMARY_TAG=
 RUN_PREFIX=
 PLATFORM=linux/amd64
@@ -301,7 +302,12 @@ get_options() {
            ;;
        --tag)
            if [ "$2" ]; then
+                if [ -z "$TAG" ]; then
                    TAG="--tag $2"
+                    PRIMARY_TAG="$2"
+                else
+                    TAG+=" --tag $2"
+                fi
                shift
            else
                missing_requirement "$1"
@@ -475,8 +481,10 @@ get_options() {
    if [ -z "$TAG" ]; then
        TAG="--tag dynamo:${VERSION}-${FRAMEWORK,,}"
+        PRIMARY_TAG="dynamo:${VERSION}-${FRAMEWORK,,}"
        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
            TAG="${TAG}-${TARGET}"
+            PRIMARY_TAG="${PRIMARY_TAG}-${TARGET}"
        fi
    fi
@@ -540,7 +548,7 @@ show_help() {
    echo "  [--build-arg additional build args to pass to docker build]"
    echo "  [--cache-from cache location to start from]"
    echo "  [--cache-to location where to cache the build output]"
-    echo "  [--tag tag for image]"
+    echo "  [--tag tag for image (can be specified multiple times)]"
    echo "  [--uid user ID for local-dev images (only with --target local-dev)]"
    echo "  [--gid group ID for local-dev images (only with --target local-dev)]"
    echo "  [--no-cache disable docker build cache]"
@@ -1010,7 +1018,7 @@ if [[ -z "${TARGET:-}" || "${TARGET:-}" == "dev" || "${TARGET:-}" == "local-dev"
    BUILD_ARGS+=" --build-arg FRAMEWORK=${FRAMEWORK,,} "
    # Preserve historical tagging behavior for dev/local-dev (build.sh used to delegate out).
-    base="${TAG#--tag }"
+    base="${PRIMARY_TAG}"
    base="${base%-runtime}"
    base="${base%-local-dev}"
    base="${base%-dev}"
@@ -1096,7 +1104,7 @@ fi
 # Use BuildKit for enhanced metadata
 if docker buildx version &>/dev/null; then
-    $RUN_PREFIX docker buildx build --progress=plain${LOAD_FLAG}${PUSH} -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
+    $RUN_PREFIX docker buildx build --progress=plain ${LOAD_FLAG} ${PUSH} -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
    BUILD_EXIT_CODE=${PIPESTATUS[0]}
 else
    $RUN_PREFIX DOCKER_BUILDKIT=1 docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
@@ -1110,8 +1118,8 @@ fi
 # Handle --make-efa flag: add AWS EFA layer on top of the built image
 # This runs BEFORE local-dev so the flow is: dev -> dev-aws -> local-dev-aws
 if [[ "${MAKE_EFA:-}" == "true" ]]; then
-    # Get the base image that was just built (dev or runtime)
+    # Get the base image that was just built (use PRIMARY_TAG to avoid parsing issues)
-    BASE_IMAGE_FOR_EFA=$(echo "$TAG" | sed 's/--tag //')
+    BASE_IMAGE_FOR_EFA="${PRIMARY_TAG}"
    # Determine the EFA stage based on the target
    # runtime target -> runtime-aws stage