test(planner): isolate planner-family suites [DYN-2534] (#7723)

ab5a31b5 · Alec · GitHub · cc22114d · ab5a31b5 · ab5a31b5
Unverified Commit ab5a31b5 authored Mar 31, 2026 by Alec Committed by GitHub Mar 31, 2026
20 changed files
--- a/.github/actions/build-flavor/action.yml
+++ b/.github/actions/build-flavor/action.yml
@@ -2,8 +2,12 @@ name: 'Build Flavor'
 description: 'Build a single Dynamo framework image (checkout → login → tag → builder → dockerfile → build → test image → summary)'
 inputs:
  framework:
-    description: 'Framework name (vllm, sglang, trtllm)'
+    description: 'Framework name (e.g. dynamo, vllm, sglang, trtllm)'
    required: true
+  builder_flavor:
+    description: 'Optional BuildKit routing flavor override (vllm, sglang, trtllm, general)'
+    required: false
+    default: ''
  target:
    description: 'Target stage for Docker rendering'
    required: true
@@ -12,7 +16,12 @@ inputs:
    required: true
  cuda_version:
    description: 'CUDA version to build (e.g., 12.9, 13.0)'
-    required: true
+    required: false
+    default: ''
+  cpu_only:
+    description: 'Build and tag this image as CPU-only. The shared container render/build path still requires a default CUDA version internally.'
+    required: false
+    default: 'false'
  builder_name:
    description: 'Buildkit builder name'
    required: true
@@ -93,6 +102,12 @@ outputs:
  cuda_version_plain:
    description: 'CUDA major version (e.g., 12 from 12.9)'
    value: ${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
+  image_variant_label:
+    description: 'Image variant label used in job names and cache namespaces (e.g. cpu, cuda12)'
+    value: ${{ steps.calculate-target-tag.outputs.image_variant_label }}
+  image_tag_suffix:
+    description: 'Image tag suffix including leading hyphen (e.g. -cpu, -cuda12)'
+    value: ${{ steps.calculate-target-tag.outputs.image_tag_suffix }}
 runs:
  using: "composite"
@@ -109,44 +124,68 @@ runs:
      id: calculate-target-tag
      shell: bash
      run: |
-        CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+        if [[ "${{ inputs.cpu_only }}" == "true" ]]; then
-        CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          CUDA_VERSION_RAW="${{ inputs.cuda_version }}"
+          if [[ -z "$CUDA_VERSION_RAW" ]]; then
+            # Planner still shares the CUDA-backed wheel builder path today even
+            # though the final runtime image is CPU-only. Keep that detail
+            # internal to the build action instead of surfacing it in job names.
+            CUDA_VERSION_RAW="12.9"
+          fi
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          IMAGE_VARIANT_LABEL="cpu"
+          IMAGE_TAG_SUFFIX="-cpu"
+          BUILDER_CUDA_VERSION=""
+        else
+          CUDA_VERSION_RAW="${{ inputs.cuda_version }}"
+          if [[ -z "$CUDA_VERSION_RAW" ]]; then
+            echo "cuda_version is required unless cpu_only=true" >&2
+            exit 1
+          fi
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          IMAGE_VARIANT_LABEL="cuda${CUDA_VERSION}"
+          IMAGE_TAG_SUFFIX="-cuda${CUDA_VERSION}"
+          BUILDER_CUDA_VERSION="${CUDA_VERSION_RAW}"
+        fi
        EFA_SUFFIX=""
        if [ "${{ inputs.make_efa }}" == "true" ]; then
          EFA_SUFFIX="-efa"
        fi
        TARGET_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}"
        TEST_TAG_PLAIN="${{ github.sha }}-${{ inputs.framework }}-${{ inputs.target }}${EFA_SUFFIX}-test"
-        DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}-cuda${CUDA_VERSION}"
+        DEFAULT_TARGET_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG_PLAIN}${IMAGE_TAG_SUFFIX}"
-        TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}-cuda${CUDA_VERSION}"
+        TEST_IMAGE_URI="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com/ai-dynamo/dynamo:${TEST_TAG_PLAIN}${IMAGE_TAG_SUFFIX}"
        echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
        echo "test_image_uri=${TEST_IMAGE_URI}" >> $GITHUB_OUTPUT
        echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
        echo "test_tag_plain=${TEST_TAG_PLAIN}" >> $GITHUB_OUTPUT
        echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
+        echo "effective_cuda_version=${CUDA_VERSION_RAW}" >> $GITHUB_OUTPUT
+        echo "builder_cuda_version=${BUILDER_CUDA_VERSION}" >> $GITHUB_OUTPUT
+        echo "image_variant_label=${IMAGE_VARIANT_LABEL}" >> $GITHUB_OUTPUT
+        echo "image_tag_suffix=${IMAGE_TAG_SUFFIX}" >> $GITHUB_OUTPUT
    - name: Initialize Dynamo Builder
      uses: ./.github/actions/init-dynamo-builder
      with:
        builder_name: ${{ inputs.builder_name }}
-        flavor: ${{ inputs.framework }}
+        flavor: ${{ inputs.builder_flavor != '' && inputs.builder_flavor || inputs.framework }}
        arch: ${{ inputs.platform }}
-        cuda_version: ${{ inputs.cuda_version }}
+        cuda_version: ${{ steps.calculate-target-tag.outputs.builder_cuda_version }}
        fresh_builder: ${{ inputs.fresh_builder }}
    - name: Calculate extra tags
      id: extra-tags
      shell: bash
      env:
        EXTRA_TAGS: ${{ inputs.extra_tags }}
-        CUDA_VERSION: ${{ inputs.cuda_version }}
+        IMAGE_TAG_SUFFIX: ${{ steps.calculate-target-tag.outputs.image_tag_suffix }}
      run: |
-        CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
        ECR_REGISTRY="${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com"
        ACR_REGISTRY="${{ inputs.azure_acr_hostname }}"
        RESULT=""
        if [ -n "$EXTRA_TAGS" ]; then
          while IFS= read -r tag; do
            if [ -n "$tag" ]; then
-              RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}"$'\n'
+              RESULT+="${ECR_REGISTRY}/ai-dynamo/dynamo:${tag}${IMAGE_TAG_SUFFIX}"$'\n'
            fi
          done <<< "$EXTRA_TAGS"
        fi
@@ -165,7 +204,8 @@ runs:
        echo "framework: ${{ inputs.framework }}"
        echo "target: ${{ inputs.target }}"
        echo "platform: ${{ inputs.platform }}"
-        echo "cuda_version: ${{ inputs.cuda_version }}"
+        echo "image_variant: ${{ steps.calculate-target-tag.outputs.image_variant_label }}"
+        echo "effective_build_cuda_version: ${{ steps.calculate-target-tag.outputs.effective_cuda_version }}"
        echo "no_cache: ${{ inputs.no_cache }}"
        echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
        echo "push_image: ${{ inputs.push_image }}"
@@ -183,7 +223,7 @@ runs:
            --target=${{ inputs.target }} \
            --framework=${{ inputs.framework }} \
            --platform=${{ inputs.platform }} \
-            --cuda-version=${{ inputs.cuda_version }} \
+            --cuda-version=${{ steps.calculate-target-tag.outputs.effective_cuda_version }} \
            ${MAKE_EFA_FLAG} \
            --show-result \
            --output-short-filename
@@ -195,7 +235,7 @@ runs:
        framework: ${{ inputs.framework }}
        target: ${{ inputs.target }}
        platform: ${{ inputs.platform }}
-        cuda_version: ${{ inputs.cuda_version }}
+        cuda_version: ${{ steps.calculate-target-tag.outputs.effective_cuda_version }}
        aws_default_region: ${{ inputs.aws_default_region }}
        sccache_s3_bucket: ${{ inputs.sccache_s3_bucket }}
        aws_account_id: ${{ inputs.aws_account_id }}
@@ -213,17 +253,17 @@ runs:
      uses: ./.github/actions/builder-refresher
      with:
        builder_name: ${{ inputs.builder_name }}
-        flavor: ${{ inputs.framework }}
+        flavor: ${{ inputs.builder_flavor != '' && inputs.builder_flavor || inputs.framework }}
        arch: ${{ inputs.platform }}
-        cuda_version: ${{ inputs.cuda_version }}
+        cuda_version: ${{ steps.calculate-target-tag.outputs.builder_cuda_version }}
    - name: Build and Push Test Image
      if: ${{ inputs.target != 'dev' }}
      shell: bash
      env:
        ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
      run: |
-        CUDA_MAJOR=${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
+        IMAGE_VARIANT="${{ steps.calculate-target-tag.outputs.image_variant_label }}"
-        CACHE_TAG="test-${{ inputs.framework }}-cuda${CUDA_MAJOR}-cache"
+        CACHE_TAG="test-${{ inputs.framework }}-${IMAGE_VARIANT}-cache"
        CACHE_ARGS="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${CACHE_TAG}"
        CACHE_ARGS+=" --cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${CACHE_TAG}"
        if [[ "$GITHUB_REF_NAME" == "main" ]]; then
@@ -257,7 +297,7 @@ runs:
      shell: bash
      if: ${{ inputs.push_image == 'true' && inputs.show_summary == 'true' }}
      run: |
-        echo "### 🐳 ${{ inputs.framework }}-cuda${{ inputs.cuda_version }} Default Image" >> $GITHUB_STEP_SUMMARY
+        echo "### 🐳 ${{ inputs.framework }}-${{ steps.calculate-target-tag.outputs.image_variant_label }} Default Image" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY
        echo "| Image URI |" >> $GITHUB_STEP_SUMMARY
        echo "|-----|" >> $GITHUB_STEP_SUMMARY

--- a/.github/actions/pytest/action.yml
+++ b/.github/actions/pytest/action.yml
@@ -151,7 +151,7 @@ runs:
          --name ${{ env.CONTAINER_ID }}_pytest \
          -v "${TEST_RESULTS_DIR}:/workspace/test-results" \
          ${{ inputs.image_tag }} \
-          bash -c "${PYTEST_CMD}"
+          sh -c "umask 0022 && ${PYTEST_CMD}"
        TEST_EXIT_CODE=$?
        echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
@@ -244,7 +244,7 @@ runs:
          --name ${{ env.CONTAINER_ID }}_pytest \
          -v "${TEST_RESULTS_DIR}:/workspace/test-results" \
          ${{ inputs.image_tag }} \
-          bash -c "${PYTEST_CMD}"
+          sh -c "umask 0022 && ${PYTEST_CMD}"
        TEST_EXIT_CODE=$?
        echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV

--- a/.github/filters.yaml
+++ b/.github/filters.yaml
@@ -119,8 +119,6 @@ planner:
  - 'container/templates/planner.Dockerfile'
  - 'components/src/dynamo/planner/**'
  - 'components/src/dynamo/global_planner/**'
-  - 'tests/planner/**'
-  - 'tests/global_planner/**'
  - 'components/src/dynamo/profiler/**'
  - 'components/src/dynamo/global_router/**'

--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -64,7 +64,8 @@ planner:
  - changed-files:
      - any-glob-to-any-file:
          - components/src/dynamo/planner/**
-          - tests/planner/**
+          - components/src/dynamo/profiler/**
+          - components/src/dynamo/global_planner/**
 # Deployment labels
 deployment::k8s:

--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -7,9 +7,14 @@ on:
  workflow_call:
    inputs:
      framework:
-        description: 'Framework name (vllm, sglang, trtllm)'
+        description: 'Framework name (e.g. dynamo, vllm, sglang, trtllm)'
        required: true
        type: string
+      builder_flavor:
+        description: 'Optional BuildKit routing flavor override (vllm, sglang, trtllm, general)'
+        required: false
+        type: string
+        default: ''
      target:
        description: 'Target stage for Docker rendering'
        required: true
@@ -20,8 +25,14 @@ on:
        type: string
      cuda_version:
        description: 'CUDA version to build (e.g., 12.9, 13.0)'
-        required: true
+        required: false
        type: string
+        default: ''
+      cpu_only:
+        description: 'Build and test this target as a CPU-only image variant'
+        required: false
+        type: boolean
+        default: false
      build_timeout_minutes:
        description: 'Timeout in minutes for the build step'
        required: false
@@ -148,12 +159,14 @@ jobs:
  # ============================================================================
  build:
    if: inputs.build_image
-    name: Build cuda${{ inputs.cuda_version }}
+    name: Build ${{ inputs.cpu_only && 'cpu' || format('cuda{0}', inputs.cuda_version) }}
    runs-on: prod-builder-v3
    timeout-minutes: ${{ inputs.build_timeout_minutes }}
    outputs:
      target_tag_plain: ${{ steps.build.outputs.target_tag_plain }}
      test_tag_plain: ${{ steps.build.outputs.test_tag_plain }}
+      image_variant_label: ${{ steps.build.outputs.image_variant_label }}
+      image_tag_suffix: ${{ steps.build.outputs.image_tag_suffix }}
      compliance_arches: ${{ steps.compliance-arches.outputs.arches }}
      test_runners: ${{ steps.test-runners.outputs.runners }}
    env:
@@ -194,9 +207,11 @@ jobs:
        uses: ./.github/actions/build-flavor
        with:
          framework: ${{ inputs.framework }}
+          builder_flavor: ${{ inputs.builder_flavor }}
          target: ${{ inputs.target }}
          platform: ${{ inputs.platform }}
          cuda_version: ${{ inputs.cuda_version }}
+          cpu_only: ${{ inputs.cpu_only }}
          builder_name: ${{ inputs.builder_name }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
@@ -225,7 +240,7 @@ jobs:
        ( inputs.run_cpu_only_tests || inputs.run_single_gpu_tests ) &&
        inputs.build_image
    needs: [build]
-    name: Test cuda${{ inputs.cuda_version }} (${{ matrix.arch }})
+    name: Test ${{ inputs.cpu_only && 'cpu' || format('cuda{0}', inputs.cuda_version) }} (${{ matrix.arch }})
    strategy:
      fail-fast: false
      matrix:
@@ -240,12 +255,9 @@ jobs:
        id: calculate-target-tag
        shell: bash
        run: |
-          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
-          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
-          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
-          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
-          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
+          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
          echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
      - name: Docker Login
        uses: ./.github/actions/docker-login
@@ -275,9 +287,13 @@ jobs:
          # Run the sanity check script inside the container
          # The script is located in /workspace/deploy/sanity_check.py in runtime containers
          export WORKSPACE=/workspace
+          SANITY_FLAGS="--runtime-check --no-gpu-check"
+          if [[ "${{ inputs.target }}" == "planner" ]]; then
+            SANITY_FLAGS="${SANITY_FLAGS} --no-framework-check"
+          fi
          set +e
-          docker run --rm "${{ steps.calculate-target-tag.outputs.runtime_image }}" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check
+          docker run --rm "${{ steps.calculate-target-tag.outputs.runtime_image }}" python ${WORKSPACE}/deploy/sanity_check.py ${SANITY_FLAGS}
          SANITY_CHECK_EXIT_CODE=$?
          set -e
          if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then
@@ -328,7 +344,7 @@ jobs:
        inputs.run_multi_gpu_tests &&
        inputs.build_image
    needs: [build]
-    name: Multi-gpu test cuda${{ inputs.cuda_version }}
+    name: Multi-gpu test ${{ inputs.cpu_only && 'cpu' || format('cuda{0}', inputs.cuda_version) }}
    runs-on: prod-tester-amd-gpu-4-v1
    env:
      FRAMEWORK: ${{ inputs.framework }}
@@ -339,12 +355,9 @@ jobs:
        id: calculate-target-tag
        shell: bash
        run: |
-          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
-          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
-          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
-          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
-          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}-cuda${CUDA_VERSION}
+          TEST_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.test_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
          echo "test_image=${TEST_IMAGE}" >> $GITHUB_OUTPUT
      - name: Docker Login
        uses: ./.github/actions/docker-login
@@ -390,7 +403,7 @@ jobs:
      fail-fast: false
      matrix:
        arch: ${{ fromJson(needs.build.outputs.compliance_arches) }}
-    name: Compliance cuda${{ inputs.cuda_version }}-${{ matrix.arch }}
+    name: Compliance ${{ inputs.cpu_only && 'cpu' || format('cuda{0}', inputs.cuda_version) }}-${{ matrix.arch }}
    runs-on: prod-builder-v3
    steps:
      - name: Checkout repository
@@ -407,10 +420,8 @@ jobs:
        id: images
        shell: bash
        run: |
-          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          echo "image_variant_label=${{ needs.build.outputs.image_variant_label }}" >> $GITHUB_OUTPUT
-          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
-          echo "cuda_major=${CUDA_VERSION}" >> $GITHUB_OUTPUT
-          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}
          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
          # Sanitize arch for artifact name: linux/amd64 -> amd64 (artifact names can't contain /)
          ARCH="${{ matrix.arch }}"
@@ -419,9 +430,10 @@ jobs:
        uses: ./.github/actions/compliance-scan
        with:
          image: ${{ steps.images.outputs.runtime_image }}
-          artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-cuda${{ steps.images.outputs.cuda_major }}-${{ steps.images.outputs.arch_suffix }}
+          artifact_name: compliance-${{ inputs.framework }}-${{ inputs.target }}${{ inputs.make_efa && '-efa' || '' }}-${{ steps.images.outputs.image_variant_label }}-${{ steps.images.outputs.arch_suffix }}
          arch: ${{ matrix.arch }}
          framework: ${{ inputs.framework }}
+          target: ${{ inputs.target }}
          cuda_version: ${{ inputs.cuda_version }}
@@ -436,7 +448,7 @@ jobs:
      inputs.copy_to_acr &&
      needs.build.result == 'success' &&
      (needs.test.result == 'success' || needs.test.result == 'skipped')
-    name: copy-to-acr cuda${{ inputs.cuda_version }}
+    name: copy-to-acr ${{ inputs.cpu_only && 'cpu' || format('cuda{0}', inputs.cuda_version) }}
    runs-on: prod-default-small-v2
    outputs:
      target_tag_plain: ${{ needs.build.outputs.target_tag_plain }}
@@ -444,24 +456,16 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
-      - name: Calculate target tag
-        id: calculate-target-tag
-        shell: bash
-        run: |
-          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
-          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
-          echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
      - name: Copy image to target registry
        timeout-minutes: ${{ inputs.copy_timeout_minutes }}
        uses: ./.github/actions/skopeo-copy
        with:
          source_registry: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
          source_image: ai-dynamo/dynamo
-          source_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
+          source_tag: ${{ needs.build.outputs.target_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
          target_registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
          target_image: ai-dynamo/dynamo
-          target_tag: ${{ needs.build.outputs.target_tag_plain }}-cuda${{ steps.calculate-target-tag.outputs.cuda_version_plain }}
+          target_tag: ${{ needs.build.outputs.target_tag_plain }}${{ needs.build.outputs.image_tag_suffix }}
          source_aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
          source_aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
          target_azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}

--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -36,6 +36,7 @@ jobs:
    environment: ${{ github.event_name == 'workflow_dispatch' && 'protected-deploy' || '' }}
    outputs:
      core: ${{ steps.changes.outputs.core }}
+      planner: ${{ steps.changes.outputs.planner }}
      operator: ${{ steps.changes.outputs.operator }}
      deploy: ${{ steps.changes.outputs.deploy }}
      vllm: ${{ steps.changes.outputs.vllm }}
@@ -59,7 +60,7 @@ jobs:
  backend-status-check:
    runs-on: ubuntu-latest
-    needs: [changed-files, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator]  # THIS list determines blocking jobs
+    needs: [changed-files, planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator]  # THIS list determines blocking jobs
    if: always()
    steps:
      - name: "Check all dependent jobs"
@@ -176,6 +177,30 @@ jobs:
 # ============================================================================
 # FRAMEWORK PIPELINES (Build → Test → Copy)
 # ============================================================================
+  # ============================================================================
+  # PLANNER PIPELINE
+  # ============================================================================
+  planner-pipeline:
+    name: planner
+    needs: [changed-files]
+    if: needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.planner == 'true'
+    uses: ./.github/workflows/build-test-distribute-flavor.yml
+    with:
+      framework: dynamo
+      builder_flavor: general
+      target: planner
+      platform: 'linux/amd64'
+      cpu_only: true
+      builder_name: ${{ needs.changed-files.outputs.builder_name }}
+      build_timeout_minutes: 45
+      run_cpu_only_tests: true
+      cpu_only_test_markers: 'pre_merge and planner and gpu_0'
+      cpu_only_test_timeout_minutes: 30
+      run_single_gpu_tests: false
+      run_multi_gpu_tests: false
+      copy_to_acr: false
+    secrets: inherit
  # ============================================================================
  # VLLM PIPELINE
  # ============================================================================
@@ -406,7 +431,7 @@ jobs:
    name: Clean K8s builder if exists
    runs-on: prod-default-small-v2
    if: always()
-    needs: [vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator, changed-files]
+    needs: [planner-pipeline, vllm-pipeline, sglang-pipeline, trtllm-pipeline, operator, changed-files]
    steps:
    - name: Checkout repository
      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0

--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -33,7 +33,6 @@ CODEOWNERS @ai-dynamo/Devops
 /components/src/dynamo/global_planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 /examples/global_planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 /components/src/dynamo/profiler/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
-/tests/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 # recipes

--- a/tests/global_planner/unit/test_scale_request_handler.py
+++ b/tests/global_planner/unit/test_scale_request_handler.py
--- a/components/src/dynamo/planner/README.md
+++ b/components/src/dynamo/planner/README.md
@@ -44,3 +44,4 @@ When both modes are enabled, throughput-based scaling provides a lower bound on
 - **User docs**: [Planner Guide](../../../../docs/components/planner/planner-guide.md) (deployment, configuration, examples)
 - **Design docs**: [Planner Design](../../../../docs/design-docs/planner-design.md) (architecture, algorithms)
+- **Manual workflows**: [tests/manual/README.md](tests/manual/README.md) (dry run helpers, perf configs, and manual scaling scripts)
--- a/components/src/dynamo/planner/core/throughput/pre_swept_results.py
+++ b/components/src/dynamo/planner/core/throughput/pre_swept_results.py
@@ -323,7 +323,7 @@ class PreSweptResultsHelper:
 if __name__ == "__main__":
    # demo of how to use merge_raw_data
    merge_raw_data(
-        "/home/jasonzho/repo/dynamo/tests/planner/profiling_results/H200_TP1P_TP1D/selected_prefill_interpolation/raw_data.npz",
+        "/home/jasonzho/repo/dynamo/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D/selected_prefill_interpolation/raw_data.npz",
        configs={
            "gpu_type": "h200_sxm",
            "model": "nvidia/Llama-3.1-8B-Instruct-FP8",
@@ -339,7 +339,7 @@ if __name__ == "__main__":
        mode="prefill",
    )
    merge_raw_data(
-        "/home/jasonzho/repo/dynamo/tests/planner/profiling_results/H200_TP1P_TP1D/selected_decode_interpolation/raw_data.npz",
+        "/home/jasonzho/repo/dynamo/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D/selected_decode_interpolation/raw_data.npz",
        configs={
            "gpu_type": "h200_sxm",
            "model": "nvidia/Llama-3.1-8B-Instruct-FP8",

--- a/tests/planner/conftest.py
+++ b/tests/planner/conftest.py
--- a/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml
+++ b/tests/planner/profiling_results/H200_TP1P_TP1D/disagg.yaml
--- a/tests/planner/profiling_results/H200_TP1P_TP1D/selected_decode_interpolation/raw_data.npz
+++ b/tests/planner/profiling_results/H200_TP1P_TP1D/selected_decode_interpolation/raw_data.npz
--- a/tests/planner/profiling_results/H200_TP1P_TP1D/selected_prefill_interpolation/raw_data.npz
+++ b/tests/planner/profiling_results/H200_TP1P_TP1D/selected_prefill_interpolation/raw_data.npz
--- a/tests/planner/unit/test_virtual_connector.py
+++ b/tests/planner/unit/test_virtual_connector.py
@@ -16,9 +16,9 @@ from dynamo.planner import SubComponentType, TargetReplica, VirtualConnector
 pytestmark = [
    pytest.mark.gpu_0,
    pytest.mark.pre_merge,
-    pytest.mark.unit,
+    pytest.mark.integration,
-    pytest.mark.planner,
    pytest.mark.sglang,
+    pytest.mark.planner,
 ]
 logger = logging.getLogger(__name__)

--- a/tests/planner/.gitignore
+++ b/tests/planner/.gitignore
@@ -8,4 +8,4 @@ e2e_scaling_results/
 # Python cache
 __pycache__/
 *.pyc
 *.pyo
\ No newline at end of file
--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
 The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
 prefill/decode workers based on TTFT, ITL, and request patterns.
-To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.common.txt -r container/deps/requirements.planner.txt`
+To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../../../../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.common.txt -r container/deps/requirements.planner.txt`
 ## Pre-Requisite: Pre-Deployment Profiling Data
@@ -19,18 +19,18 @@ You have two options to obtain the pre-deployment profiling data:
 ### Option A: Use Test Configuration (Quickstart)
 Use the pre-configured test deployment with sample profiling data, we provide the results and the deployment configuration for the following models x hardware configurations:
- `nvidia/Llama-3.1-8B-Instruct-FP8` on H200 with max context length 16384, TP1 Prefill, and TP1 Decode. At ISL/OSL 3000/150, it achieves 40k tokens/s/gpu prefill with 80ms TTFT and 10k tokens/s/gpu decode with 10ms ITL. See `profiling_results/H200_TP1P_TP1D/`.
+- `nvidia/Llama-3.1-8B-Instruct-FP8` on H200 with max context length 16384, TP1 Prefill, and TP1 Decode. At ISL/OSL 3000/150, it achieves 40k tokens/s/gpu prefill with 80ms TTFT and 10k tokens/s/gpu decode with 10ms ITL. See `../tests/data/profiling_results/H200_TP1P_TP1D/`.
 ### Option B: Use Your Own Profiling Results
-1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../docs/components/profiler/profiler-guide.md) for detailed instructions.
+1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../../../../../docs/components/profiler/profiler-guide.md) for detailed instructions.
 ## Interpolator Testing
 SLA planner uses two interpolators to estimate the performance of prefill and decode. You can test the interpolators with the following command:
 ```bash
-python components/planner/src/dynamo/planner/utils/perf_interpolation.py \
+python components/src/dynamo/planner/core/throughput/interpolation.py \
  --profile_results_dir <path_to_profile_results> \
  --isl <ISL> \
  --osl <OSL> \
@@ -43,8 +43,8 @@ The script will perform the interpolation based on ISL, OSL, and TTFT and ITL SL
 For example, to test the interpolator for `nvidia/Llama-3.1-8B-Instruct-FP8` on H200 (target TTFT=200ms, ITL=10ms):
 ```bash
-python components/planner/src/dynamo/planner/utils/perf_interpolation.py \
+python components/src/dynamo/planner/core/throughput/interpolation.py \
-  --profile_results_dir tests/planner/profiling_results/H200_TP1P_TP1D/ \
+  --profile_results_dir components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D/ \
  --isl 3000 \
  --osl 300 \
  --ttft 200 \
@@ -53,7 +53,7 @@ python components/planner/src/dynamo/planner/utils/perf_interpolation.py \
 # output:
 ISL=3000, OSL=300
 TTFT=200ms, ITL=10ms
-Using profile results from tests/planner/profiling_results/H200_TP1P_TP1D/
+Using profile results from components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D/
 Interpolating prefill performance ...
        Estimated TTFT=60.00ms <= target TTFT=200.00ms. Requests can queue 140.00ms maximally while meeting TTFT SLA.
@@ -67,7 +67,7 @@ Interpolating decode performance ...
 ## Generating Load Dataset
-We provide a tool to generate load dataset with varying request rate. More details can be found in [sin_load_generator](../../benchmarks/sin_load_generator/README.md).
+We provide a tool to generate load dataset with varying request rate. More details can be found in [sin_load_generator](../../../../../../benchmarks/sin_load_generator/README.md).
 From previous interpolator testing, ISL 3000 and OSL 300 can handle ~15 request/s/gpu for both prefill and decode.
 To test planner's performance for different request rates, we can generate a load dataset with request rate varying between 12 to 36 request/s.
@@ -99,22 +99,8 @@ Before testing SLA planner on real deployments, we provide a dry run feature to
 To dry run SLA planner,
 ```bash
-python components/planner/test/planner_sla_dryrun.py \
+python components/src/dynamo/planner/tests/manual/unit/planner_sla_dryrun.py \
-    --<SLA planner arguments> \
+    --config '{"environment":"kubernetes","backend":"vllm","ttft":200,"itl":10,"profile_results_dir":"components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","throughput_adjustment_interval":60,"no_correction":true}' \
-    --dry-run \
-    --start-num-p <num_prefill_workers_to_start_with> \
-    --start-num-d <num_decode_workers_to_start_with> \
-    --output-plot <path_to_output_plot>
-```
-For example, to dry run SLA planner for the previous FP8 8B on H200 using the generated `rr-5-45_i3000o300.jsonl` dataset,
-```bash
-python components/planner/test/planner_sla_dryrun.py \
-    --ttft 200 \
-    --itl 10 \
-    --adjustment-interval 60 \
-    --profile-results-dir tests/planner/profiling_results/H200_TP1P_TP1D/ \
    --dataset rr-5-45_i3000o300.jsonl \
    --start-num-p 1 \
    --start-num-d 1 \
@@ -139,7 +125,7 @@ This directory contains comprehensive tests for validating the SLA planner's sca
 ### Test Types
-1. **Unit Tests** (`test_replica_calculation.py`) - Test the mathematical formulas for calculating prefill and decode replicas in isolation
+1. **Unit Tests** (`components/src/dynamo/planner/tests/unit/test_replica_calculation.py`) - Test the mathematical formulas for calculating prefill and decode replicas in isolation
 2. **End-to-End Tests** (`scaling/run_scaling_test.sh`) - Test complete workflow including Kubernetes deployment, load generation, and pod scaling validation
 3. **End-to-End Perf Tests** (see instructions below) - Compare performance (goodput and goodput/GPU) on deployments with and without sla planner
@@ -149,12 +135,7 @@ This directory contains comprehensive tests for validating the SLA planner's sca
 Test the replica calculation logic without requiring Kubernetes:
 ```bash
-# Set PYTHONPATH to include planner components
+PYTHONPATH=components/src python -m pytest components/src/dynamo/planner/tests/unit/test_replica_calculation.py -v
-PYTHONPATH=components/src python -m pytest tests/planner/test_replica_calculation.py -v
-# Or from the tests/planner directory:
-cd tests/planner
-PYTHONPATH=../../components/src python -m pytest test_replica_calculation.py -v
 ```
 **Note**: The unit tests automatically mock external dependencies (prometheus_client, runtime modules) to ensure they can run in isolation without requiring the full Dynamo environment.
@@ -165,85 +146,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati
 **Prerequisites:**
- **[kube-prometheus-stack](../../docs/kubernetes/observability/metrics.md) installed and running.** The SLA planner requires Prometheus to observe metrics and make scaling decisions.
+- **[kube-prometheus-stack](../../../../../../docs/kubernetes/observability/metrics.md) installed and running.** The SLA planner requires Prometheus to observe metrics and make scaling decisions.
- Ensure the Dynamo operator was installed with the Prometheus endpoint configured (see [SLA Planner Quickstart Guide](../../docs/components/planner/planner-guide.md#prerequisites) for details).
+- Ensure the Dynamo operator was installed with the Prometheus endpoint configured (see [SLA Planner Quickstart Guide](../../../../../../docs/components/planner/planner-guide.md#prerequisites) for details).
-**Prepare the test deployment manifest:**
-The test requires modifying `examples/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:
-1. Copy the base deployment:
-```bash
-cp examples/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
-```
-2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args:
-```yaml
-spec:
-  services:
-    Planner:
-      extraPodSpec:
-        mainContainer:
-          args:
-            - --environment=kubernetes
-            - --backend=vllm
-            - --adjustment-interval=60
-            - --profile-results-dir=/workspace/tests/planner/profiling_results/H200_TP1P_TP1D
-            - --ttft=100
-            - --itl=10
-            - --load-predictor=constant
-            - --no-correction
-```
-Remove `volumes` and `volumeMounts`:
-```
-# Remove these lines or any similar lines
-          volumeMounts:
-            - name: planner-profile-data
-              mountPath: /workspace/profiling_results
-              readOnly: true
-        volumes:
-          - name: planner-profile-data
-            configMap:
-              # Must be pre-created before deployment by the profiler
-              # See docs/components/planner/planner-guide.md for more details
-              name: planner-profile-data
-```
-3. Update the model in VllmPrefillWorker and VllmDecodeWorker services:
-```yaml
-args:
-  - -m
-  - dynamo.vllm
-  - --model
-  - nvidia/Llama-3.1-8B-Instruct-FP8
-  - --migration-limit=3
-  - --max-model-len=8192
-```
-**Run the test:**
-```bash
-./scaling/run_scaling_test.sh --namespace <namespace>
-```
-To save results to `tests/planner/e2e_scaling_results` instead of `/tmp`:
-```bash
-./scaling/run_scaling_test.sh --namespace <namespace> --save-results
-```
-**E2E Test Deployment Management:**
- If no deployment exists: creates, tests, and cleans up deployment
- If deployment exists: uses existing deployment and preserves it
- Perfect for development workflows where you want to keep deployments running between tests
 **Test Scenario**
-The main test scenario validates prefill scaling for H200 with 1P1D → 2P1D configuration:
+The main test scenario validates prefill scaling for H200 with 1P1D -> 2P1D configuration:
 - **Phase 1**: 8 req/s for 90s (baseline - maintains 1P1D)
 - **Phase 2**: 18 req/s for 120s (scaling trigger - scales to 2P1D)
@@ -252,6 +160,18 @@ The main test scenario validates prefill scaling for H200 with 1P1D → 2P1D con
 - **Total test duration**: ~7 minutes + scaling observation
 - **Smart cleanup**: Only removes deployment if test created it (preserves existing deployments)
+Run the test with:
+```bash
+components/src/dynamo/planner/tests/manual/scaling/run_scaling_test.sh --namespace <namespace>
+```
+To save results to `components/src/dynamo/planner/tests/e2e_scaling_results` instead of `/tmp`:
+```bash
+components/src/dynamo/planner/tests/manual/scaling/run_scaling_test.sh --namespace <namespace> --save-results
+```
 ### Instructions for End-to-End Perf Tests
 In this test, we compare performance (goodput and goodput/GPU) on deployments on the following four deployments using the aforementioned 8b FP8 model on H200 and the dataset used in dryrun:
@@ -282,9 +202,9 @@ aiperf profile \
  --model nvidia/Llama-3.1-8B-Instruct-FP8 \
  --tokenizer nvidia/Llama-3.1-8B-Instruct-FP8 \
  --endpoint-type chat \
-  --url localhost:8000 \ # or the port-forwarded port
+  --url localhost:8000 \
  --streaming \
-  --input-file /workspace/rr-5-45_i3000o300.jsonl \ # path to the generated load dataset \
+  --input-file /workspace/rr-5-45_i3000o300.jsonl \
  --custom-dataset-type mooncake_trace \
  --goodput "time_to_first_token:200 inter_token_latency:10"
 ```
@@ -302,5 +222,4 @@ The table below shows the performance improvement of SLA planner across differen
 |---------------|-----------------|-------------------------|
 | Inefficient P/D ratio | 725% | 600% |
 | Inefficient parallelization mapping | 311% | 249% |
-| Best static deployment | 52% | 29% |`
+| Best static deployment | 52% | 29% |
--- a/tests/planner/figures/dryrun_plot.png
+++ b/tests/planner/figures/dryrun_plot.png
--- a/tests/planner/figures/sla_planner_perf.png
+++ b/tests/planner/figures/sla_planner_perf.png
--- a/tests/planner/perf_test_configs/agg_8b.yaml
+++ b/tests/planner/perf_test_configs/agg_8b.yaml
@@ -37,7 +37,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: my-registry/vllm-runtime:my-tag
+          image: my-registry/dynamo-frontend:my-tag
          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh