refactor: move core logics of DPP -> AIC and support static profiling (#6285)

Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>

refactor: move core logics of DPP -> AIC and support static profiling (#6285)
Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>
4c648b11 · Hongkuan Zhou · GitHub · f6d4351f · 4c648b11 · 4c648b11
Unverified Commit 4c648b11 authored Feb 25, 2026 by Hongkuan Zhou Committed by GitHub Feb 26, 2026
20 changed files
--- a/docs/pages/components/planner/planner-guide.md
+++ b/docs/pages/components/planner/planner-guide.md
--- a/docs/pages/components/profiler/profiler-guide.md
+++ b/docs/pages/components/profiler/profiler-guide.md
--- a/docs/pages/kubernetes/api-reference.md
+++ b/docs/pages/kubernetes/api-reference.md
@@ -1327,7 +1327,7 @@ _Appears in:_
 | --- | --- | --- | --- |
 | `model` _string_ | Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").<br />Can be a HuggingFace ID or a private model name. |  | MinLength: 1 <br />Required: \{\} <br /> |
 | `backend` _[BackendType](#backendtype)_ | Backend specifies the inference backend to use for profiling and deployment. | auto | Enum: [auto sglang trtllm vllm] <br />Optional: \{\} <br /> |
-| `image` _string_ | Image is the container image reference for the profiling job (frontend image).<br />Example: "nvcr.io/nvidia/dynamo-runtime:latest"<br />backend type automatically; backend images can be overridden via overrides.dgd. |  | Optional: \{\} <br /> |
+| `image` _string_ | Image is the container image reference for the profiling job (frontend image).<br />Example: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0". |  | Optional: \{\} <br /> |
 | `modelCache` _[ModelCacheSpec](#modelcachespec)_ | ModelCache provides optional PVC configuration for pre-downloaded model weights.<br />When provided, weights are loaded from the PVC instead of downloading from HuggingFace. |  | Optional: \{\} <br /> |
 | `hardware` _[HardwareSpec](#hardwarespec)_ | Hardware describes the hardware resources available for profiling and deployment.<br />Typically auto-filled by the operator from cluster discovery. |  | Optional: \{\} <br /> |
 | `workload` _[WorkloadSpec](#workloadspec)_ | Workload defines the expected workload characteristics for SLA-based profiling. |  | Optional: \{\} <br /> |

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,14 +139,14 @@ line_length = 88
 balanced_wrapping = true
 indent = "    "
 skip = ["build"]
-known_first_party = ["dynamo"]
+known_first_party = ["dynamo", "deploy"]
 # isort may confuse what is 1st or 3rd library. e.g.
 # when dynamo/vllm/omni/xx.py import vllm, local isort may treat this `vllm` as first
 # party heuristically. This causes local sort differs from GitHub sort and pre-commit
 # failure. To mitigate 1) one can install 3rd party lib so that isort is aware of it,
 # 2) hardcode 3rd party lib here, 3) add "# isort: skip_file" to problematic files
 # as the last resort.
-known_third_party = ["vllm", "tensorrt_llm", "sglang"]
+known_third_party = ["vllm", "tensorrt_llm", "sglang", "aiconfigurator"]

 [tool.pytest.ini_options]
 minversion = "8.0"
@@ -187,6 +187,7 @@ filterwarnings = [
    "ignore:.*unclosed event loop.*:ResourceWarning", # Ignore unclosed event loop warnings
    "ignore:.*Exception ignored in.*:pytest.PytestUnraisableExceptionWarning", # Ignore unraisable exception warnings
    "ignore:The pynvml package is deprecated.*:FutureWarning", # Ignore pynvml deprecation warning, temporary until upstream library updates to nvidia-ml-py
+    "ignore:The behavior of DataFrame concatenation with empty or all-NA entries is deprecated.*:FutureWarning", # pandas 2.x concat deprecation in AIC SDK TODO: fix in AIC
    # Pydantic V2 deprecation warnings from TRTLLM dependencies (raised at import time during collection)
    "ignore:Support for class-based `config`.*:pydantic.warnings.PydanticDeprecatedSince20",
    "ignore:Using extra keyword arguments on `Field`.*:pydantic.warnings.PydanticDeprecatedSince20",

--- a/tests/profiler/configs/10_thorough_override_security_context.yaml
+++ b/tests/profiler/configs/10_thorough_override_security_context.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 10: Thorough sweep with DGD overrides for imagePullSecrets.
+# Verifies that overrides can inject new spec-level fields (imagePullSecrets)
+# that do not exist in the base DGD template.
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/dynamo:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 8
+  numGpusPerNode: 8
+searchStrategy: thorough
+overrides:
+  dgd:
+    spec:
+      imagePullSecrets:
+        - name: my-registry-secret
+        - name: nvcr-pull-secret
--- a/tests/profiler/configs/11_auto_rapid_no_planner_no_load.yaml
+++ b/tests/profiler/configs/11_auto_rapid_no_planner_no_load.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 11: Auto backend, rapid, without planner, no input load
+model: "Qwen/Qwen3-32B"
+image: "hongkuanz196/trtllm-runtime:hzhou-0224"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 8
+  numGpusPerNode: 8
--- a/tests/profiler/configs/1_rapid_supported_no_planner_no_load.yaml
+++ b/tests/profiler/configs/1_rapid_supported_no_planner_no_load.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 1: AIC supported model, rapid, without planner, no input load
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 8
+  numGpusPerNode: 8
+sla:
+  itl: 50.0
--- a/tests/profiler/configs/2_rapid_supported_no_planner_with_load.yaml
+++ b/tests/profiler/configs/2_rapid_supported_no_planner_with_load.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 2: AIC supported model, rapid, without planner, input load (request rate)
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 64
+  numGpusPerNode: 8
+workload:
+  requestRate: 5.0
+sla:
+  itl: 50.0
--- a/tests/profiler/configs/2b_rapid_supported_pvc_no_planner_with_load.yaml
+++ b/tests/profiler/configs/2b_rapid_supported_pvc_no_planner_with_load.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 2b: AIC supported model, rapid, without planner, input load, with PVC model cache
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 64
+  numGpusPerNode: 8
+modelCache:
+  pvcName: model-cache
+  pvcModelPath: /model/Qwen3-32B
+workload:
+  requestRate: 5.0
+sla:
+  itl: 50.0
--- a/tests/profiler/configs/2c_rapid_supported_e2e_latency.yaml
+++ b/tests/profiler/configs/2c_rapid_supported_e2e_latency.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 2c: AIC supported model, rapid, without planner, e2eLatency instead of ttft/itl
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 64
+  numGpusPerNode: 8
+workload:
+  requestRate: 5.0
+sla:
+  ttft: null
+  itl: null
+  e2eLatency: 35000.0
--- a/tests/profiler/configs/2d_rapid_both_concurrency_and_rate_error.yaml
+++ b/tests/profiler/configs/2d_rapid_both_concurrency_and_rate_error.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 2d: Both concurrency and requestRate specified — should fail validation
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 64
+  numGpusPerNode: 8
+workload:
+  concurrency: 50
+  requestRate: 5.0
+sla:
+  itl: 50.0
--- a/tests/profiler/configs/3_rapid_supported_planner_rapid_sweep.yaml
+++ b/tests/profiler/configs/3_rapid_supported_planner_rapid_sweep.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 3: AIC supported model, rapid, with planner, rapid pre-deployment sweeping
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 8
+  numGpusPerNode: 8
+sla:
+  itl: 50.0
+features:
+  planner:
+    pre_deployment_sweeping_mode: rapid
+    enable_throughput_scaling: true
+    enable_load_scaling: false
+    mode: disagg
+    backend: trtllm
--- a/tests/profiler/configs/3b_rapid_supported_planner_rapid_sweep_mocker.yaml
+++ b/tests/profiler/configs/3b_rapid_supported_planner_rapid_sweep_mocker.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 3b: AIC supported model, rapid, with planner, rapid pre-deployment sweeping, enable mocker
+model: "Qwen/Qwen3-32B"
+backend: trtllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: h200_sxm
+  totalGpus: 8
+  numGpusPerNode: 8
+sla:
+  itl: 50.0
+features:
+  planner:
+    pre_deployment_sweeping_mode: rapid
+    enable_throughput_scaling: true
+    enable_load_scaling: false
+    mode: disagg
+    backend: trtllm
+  mocker:
+    enabled: true
--- a/tests/profiler/configs/4_rapid_unsupported_no_planner.yaml
+++ b/tests/profiler/configs/4_rapid_unsupported_no_planner.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Case 4: AIC unsupported model, rapid, without planner
+# l40s + vllm has no disagg support in AIC
+model: "Qwen/Qwen3-32B"
+backend: vllm
+image: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:latest"
+hardware:
+  gpuSku: l40s
+  totalGpus: 4
+  numGpusPerNode: 4
+  vramMb: 48000
+sla:
+  itl: 50.0
--- a/tests/profiler/configs/5_rapid_unsupported_planner.yaml
+++ b/tests/profiler/configs/5_rapid_unsupported_planner.yaml
--- a/tests/profiler/configs/5b_rapid_unsupported_planner_throughput_error.yaml
+++ b/tests/profiler/configs/5b_rapid_unsupported_planner_throughput_error.yaml
--- a/tests/profiler/configs/6_thorough_no_planner_with_load.yaml
+++ b/tests/profiler/configs/6_thorough_no_planner_with_load.yaml
--- a/tests/profiler/configs/7_thorough_planner_rapid_sweep.yaml
+++ b/tests/profiler/configs/7_thorough_planner_rapid_sweep.yaml
--- a/tests/profiler/configs/7b_thorough_planner_thorough_sweep.yaml
+++ b/tests/profiler/configs/7b_thorough_planner_thorough_sweep.yaml
--- a/tests/profiler/configs/8_thorough_empty_candidates.yaml
+++ b/tests/profiler/configs/8_thorough_empty_candidates.yaml