refactor: move core logics of DPP -> AIC and support static profiling (#6285)

Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>

refactor: move core logics of DPP -> AIC and support static profiling (#6285)
Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>
4c648b11 · Hongkuan Zhou · GitHub · f6d4351f · 4c648b11 · 4c648b11
Unverified Commit 4c648b11 authored Feb 25, 2026 by Hongkuan Zhou Committed by GitHub Feb 26, 2026
20 changed files
--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
 ]

 dependencies = [
-    "aiconfigurator[webapp] @ git+https://github.com/ai-dynamo/aiconfigurator.git@7a24afd98714af13f061cffe784d4808f5356d45",
+    "aiconfigurator[webapp] @ git+https://github.com/ai-dynamo/aiconfigurator.git@168a948d5bc32209728fe8639191a9e0d9083d18",
    "aiperf @ git+https://github.com/ai-dynamo/aiperf.git@c3fc969e9e30e9ddad35b2f613aa7c1d418f2de9",
    "matplotlib",
    "networkx",

--- a/components/src/dynamo/planner/defaults.py
+++ b/components/src/dynamo/planner/defaults.py
@@ -70,7 +70,7 @@ class SLAPlannerDefaults(BasePlannerDefaults):
    kalman_r = 10.0
    kalman_min_points = 5

-    no_correction = False  # disable correction factor, might be useful under some conditions like long cold start time
+    no_correction = True
    mode: Literal["disagg", "prefill", "decode", "agg"] = "disagg"

    throughput_metrics_source = "frontend"  # "frontend" | "router"

--- a/components/src/dynamo/planner/utils/planner_config.py
+++ b/components/src/dynamo/planner/utils/planner_config.py
@@ -133,6 +133,18 @@ class PlannerConfig(BaseModel):
                "(enable_throughput_scaling or enable_load_scaling)"
            )

+        if self.enable_throughput_scaling:
+            if (
+                self.pre_deployment_sweeping_mode is None
+                or self.pre_deployment_sweeping_mode
+                == PlannerPreDeploymentSweepMode.None_
+            ):
+                raise ValueError(
+                    "pre_deployment_sweeping_mode cannot be 'none' when "
+                    "enable_throughput_scaling is True. Throughput-based scaling "
+                    "requires pre-deployment sweeping to profile engine performance."
+                )
+
        if self.enable_load_scaling:
            # Router metrics URL is required outside kubernetes mode
            if not self.load_router_metrics_url and self.environment != "kubernetes":
@@ -212,6 +224,9 @@ class PlannerConfig(BaseModel):

        return cls.model_validate(data)

+    def scaling_enabled(self) -> bool:
+        return self.enable_throughput_scaling or self.enable_load_scaling
+

 if __name__ == "__main__":
    from pathlib import Path

--- a/components/src/dynamo/profiler/__main__.py
+++ b/components/src/dynamo/profiler/__main__.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Entry point for the Dynamo profiler.
+
+Usage::
+
+    python -m dynamo.profiler --config <json string or path to json/yaml>
+    python -m dynamo.profiler --config '{"model": "Qwen/Qwen3-32B", ...}'
+    python -m dynamo.profiler --config /path/to/dgdr_spec.yaml
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import os
+from pathlib import Path
+
+import yaml
+
+from dynamo.profiler.utils.dgdr_v1beta1_types import DynamoGraphDeploymentRequestSpec
+
+from .profile_sla import run_profile
+from .utils.profile_common import (
+    DEFAULT_DECODE_INTERPOLATION_GRANULARITY,
+    DEFAULT_DEPLOYMENT_TIMEOUT,
+    DEFAULT_DRY_RUN,
+    DEFAULT_OUTPUT_DIR,
+    DEFAULT_PREFILL_INTERPOLATION_GRANULARITY,
+    ProfilerOperationalConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_dgdr_spec(config_arg: str) -> DynamoGraphDeploymentRequestSpec:
+    """Parse a DGDR spec from a CLI ``--config`` argument.
+
+    Accepts a file path (JSON/YAML) or an inline JSON string.
+    """
+    path = Path(config_arg)
+    if path.is_file():
+        text = path.read_text()
+        suffix = path.suffix.lower()
+        if suffix in (".yaml", ".yml"):
+            data = yaml.safe_load(text)
+        else:
+            try:
+                data = json.loads(text)
+            except json.JSONDecodeError:
+                data = yaml.safe_load(text)
+        return DynamoGraphDeploymentRequestSpec.model_validate(data)
+
+    try:
+        data = json.loads(config_arg)
+    except json.JSONDecodeError as e:
+        raise ValueError(
+            f"--config value is neither a valid file path nor valid JSON. "
+            f"File not found: '{config_arg}'. JSON parse error: {e}"
+        ) from e
+
+    return DynamoGraphDeploymentRequestSpec.model_validate(data)
+
+
+def _parse_args() -> tuple[DynamoGraphDeploymentRequestSpec, ProfilerOperationalConfig]:
+    parser = argparse.ArgumentParser(description="Dynamo Profiler")
+    parser.add_argument(
+        "--config",
+        required=True,
+        help="DynamoGraphDeploymentRequestSpec as JSON string or path to JSON/YAML file",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default=DEFAULT_OUTPUT_DIR,
+        help=f"Path to the output results directory (default: {DEFAULT_OUTPUT_DIR})",
+    )
+    parser.add_argument(
+        "--deployment-timeout",
+        type=int,
+        default=DEFAULT_DEPLOYMENT_TIMEOUT,
+        help=f"Max seconds to wait for deployment readiness (default: {DEFAULT_DEPLOYMENT_TIMEOUT})",
+    )
+    parser.add_argument(
+        "--prefill-interpolation-granularity",
+        type=int,
+        default=DEFAULT_PREFILL_INTERPOLATION_GRANULARITY,
+        help=f"Number of ISL samples for prefill interpolation (default: {DEFAULT_PREFILL_INTERPOLATION_GRANULARITY})",
+    )
+    parser.add_argument(
+        "--decode-interpolation-granularity",
+        type=int,
+        default=DEFAULT_DECODE_INTERPOLATION_GRANULARITY,
+        help=f"Number of samples for decode interpolation (default: {DEFAULT_DECODE_INTERPOLATION_GRANULARITY})",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        default=DEFAULT_DRY_RUN,
+        help="Skip deployments and benchmarking (dev mode)",
+    )
+    args = parser.parse_args()
+
+    dgdr = _parse_dgdr_spec(args.config)
+    ops = ProfilerOperationalConfig(
+        output_dir=args.output_dir,
+        deployment_timeout=args.deployment_timeout,
+        prefill_interpolation_granularity=args.prefill_interpolation_granularity,
+        decode_interpolation_granularity=args.decode_interpolation_granularity,
+        dry_run=args.dry_run,
+    )
+
+    return dgdr, ops
+
+
+def main():
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+    try:
+        dgdr, ops = _parse_args()
+    except (ValueError, Exception) as e:
+        logger.error("Failed to parse profiler config: %s", e)
+        raise SystemExit(1) from e
+
+    os.makedirs(ops.output_dir, exist_ok=True)
+    log_file_handler = logging.FileHandler(f"{ops.output_dir}/profile_sla.log")
+    log_file_handler.setLevel(logging.INFO)
+    log_file_handler.setFormatter(
+        logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+            "%Y-%m-%d %H:%M:%S",
+        )
+    )
+    logging.getLogger().addHandler(log_file_handler)
+
+    asyncio.run(run_profile(dgdr, ops))
+
+
+if __name__ == "__main__":
+    main()
--- a/components/src/dynamo/profiler/interpolation.py
+++ b/components/src/dynamo/profiler/interpolation.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Interpolation curve generation for planner pre-deployment sweeping."""
+
+import logging
+import os
+
+import yaml
+
+from deploy.utils.dynamo_deployment import DynamoDeploymentClient
+from dynamo.planner.defaults import SubComponentType
+from dynamo.planner.utils.planner_config import PlannerPreDeploymentSweepMode
+from dynamo.profiler.utils.config import Config, get_service_name_by_type
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
+    PickedParallelConfig,
+)
+from dynamo.profiler.utils.defaults import EngineType
+from dynamo.profiler.utils.dgdr_v1beta1_types import DynamoGraphDeploymentRequestSpec
+from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
+from dynamo.profiler.utils.profile_common import ProfilerOperationalConfig
+from dynamo.profiler.utils.profile_decode import (
+    profile_decode,
+    profile_decode_aiconfigurator,
+)
+from dynamo.profiler.utils.profile_prefill import (
+    profile_prefill,
+    profile_prefill_aiconfigurator,
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def run_interpolation(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    ops: ProfilerOperationalConfig,
+    disagg_config: dict,
+    best_prefill_config: PickedParallelConfig,
+    best_decode_config: PickedParallelConfig,
+    model: str,
+    system: str,
+    backend: str,
+    isl: int,
+    osl: int,
+    sweep_max_context_length: int,
+    deployment_clients: list,
+):
+    """Generate interpolation curves for the planner based on sweep mode.
+
+    Takes the output disagg DGD config and uses ``convert_config`` to strip
+    it down to standalone prefill / decode engines for profiling.
+    """
+    planner_cfg = (
+        dgdr.features.planner if (dgdr.features and dgdr.features.planner) else None
+    )
+    sweep_mode = PlannerPreDeploymentSweepMode.None_
+    if planner_cfg and planner_cfg.pre_deployment_sweeping_mode:
+        sweep_mode = planner_cfg.pre_deployment_sweeping_mode
+
+    if sweep_mode == PlannerPreDeploymentSweepMode.None_:
+        logger.info(
+            "Planner pre-deployment sweeping is disabled — skipping interpolation."
+        )
+        return
+
+    config_modifier = CONFIG_MODIFIERS[backend]
+    model_name, model_path = config_modifier.get_model_name(disagg_config)
+
+    best_prefill_gpus = best_prefill_config.num_gpus
+    best_decode_gpus = best_decode_config.num_gpus
+
+    # --- Prefill interpolation ---
+    prefill_config = config_modifier.convert_config(disagg_config, EngineType.PREFILL)
+
+    work_dir = f"{ops.output_dir}/selected_prefill_interpolation"
+    os.makedirs(work_dir, exist_ok=True)
+    prefill_config_fn = f"{work_dir}/config.yaml"
+    with open(prefill_config_fn, "w") as f:
+        yaml.dump(prefill_config, f)
+
+    if sweep_mode == PlannerPreDeploymentSweepMode.Rapid:
+        logger.info("Using AIC simulation for prefill interpolation.")
+        estimator = AIConfiguratorPerfEstimator(
+            hf_id=model,
+            system=system.lower(),
+            backend=backend,
+        )
+        profile_prefill_aiconfigurator(
+            work_dir,
+            best_prefill_gpus,
+            sweep_max_context_length,
+            ops.prefill_interpolation_granularity,
+            estimator,
+            tp_size=best_prefill_config.tp_size,
+        )
+    elif sweep_mode == PlannerPreDeploymentSweepMode.Thorough:
+        logger.info("Using real GPUs for prefill interpolation.")
+        frontend_port = config_modifier.get_port(prefill_config)
+        client = DynamoDeploymentClient(
+            namespace=ops.k8s_namespace,
+            base_log_dir=work_dir,
+            model_name=model_name,
+            frontend_port=frontend_port,
+            deployment_name=prefill_config["metadata"]["name"],
+        )
+        deployment_clients.append(client)
+        await client.create_deployment(prefill_config_fn)
+        logger.info("Waiting for prefill interpolation deployment...")
+        try:
+            await client.wait_for_deployment_ready(timeout=ops.deployment_timeout)
+        except TimeoutError:
+            logger.error("Prefill interpolation deployment timed out, skipping.")
+            await client.delete_deployment()
+            deployment_clients.remove(client)
+            return
+
+        await client.get_deployment_logs()
+        base_url = client.get_service_url()
+
+        profile_prefill(
+            work_dir,
+            model_name,
+            model_path,
+            base_url,
+            best_prefill_gpus,
+            sweep_max_context_length,
+            ops.prefill_interpolation_granularity,
+            attention_dp_size=best_prefill_config.dp,
+        )
+
+        await client.delete_deployment()
+        deployment_clients.remove(client)
+
+    # --- Decode interpolation ---
+    decode_config = config_modifier.convert_config(disagg_config, EngineType.DECODE)
+
+    work_dir = f"{ops.output_dir}/selected_decode_interpolation"
+    os.makedirs(work_dir, exist_ok=True)
+    decode_config_fn = f"{work_dir}/config.yaml"
+    with open(decode_config_fn, "w") as f:
+        yaml.dump(decode_config, f)
+
+    if sweep_mode == PlannerPreDeploymentSweepMode.Rapid:
+        logger.info("Using AIC simulation for decode interpolation.")
+        estimator = AIConfiguratorPerfEstimator(
+            hf_id=model,
+            system=system.lower(),
+            backend=backend,
+        )
+        attention_dp_size = best_decode_config.dp
+        max_kv_tokens = estimator.get_max_kv_tokens(
+            isl,
+            osl,
+            tp_size=best_decode_config.tp_size,
+        )
+        profile_decode_aiconfigurator(
+            work_dir,
+            best_decode_gpus,
+            max_kv_tokens,
+            sweep_max_context_length,
+            ops.decode_interpolation_granularity,
+            estimator,
+            attention_dp_size,
+            tp_size=best_decode_config.tp_size,
+        )
+    elif sweep_mode == PlannerPreDeploymentSweepMode.Thorough:
+        logger.info("Using real GPUs for decode interpolation.")
+        frontend_port = config_modifier.get_port(decode_config)
+        client = DynamoDeploymentClient(
+            namespace=ops.k8s_namespace,
+            base_log_dir=work_dir,
+            model_name=model_name,
+            frontend_port=frontend_port,
+            deployment_name=decode_config["metadata"]["name"],
+        )
+        deployment_clients.append(client)
+        await client.create_deployment(decode_config_fn)
+        logger.info("Waiting for decode interpolation deployment...")
+        try:
+            await client.wait_for_deployment_ready(timeout=ops.deployment_timeout)
+        except TimeoutError:
+            logger.error("Decode interpolation deployment timed out, skipping.")
+            await client.delete_deployment()
+            deployment_clients.remove(client)
+            return
+
+        await client.get_deployment_logs()
+
+        attention_dp_size = best_decode_config.dp
+        decode_cfg = Config.model_validate(decode_config)
+        decode_service_name = get_service_name_by_type(
+            decode_cfg, backend, SubComponentType.DECODE
+        ).lower()
+        max_kv_tokens = config_modifier.get_kv_cache_size_from_dynamo_log(
+            f"{work_dir}/{client.deployment_name}/{decode_service_name}/0.log",
+            attention_dp_size=attention_dp_size,
+        )
+        base_url = client.get_service_url()
+
+        profile_decode(
+            work_dir,
+            model_name,
+            model_path,
+            base_url,
+            best_decode_gpus,
+            max_kv_tokens,
+            sweep_max_context_length,
+            ops.decode_interpolation_granularity,
+            attention_dp_size,
+        )
+
+        await client.delete_deployment()
+        deployment_clients.remove(client)
--- a/components/src/dynamo/profiler/profile_sla.py
+++ b/components/src/dynamo/profiler/profile_sla.py
--- a/components/src/dynamo/profiler/rapid.py
+++ b/components/src/dynamo/profiler/rapid.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""RAPID search strategy: AIC simulation + picking + DGD generation."""
+
+import logging
+
+import pandas as pd
+import yaml
+from aiconfigurator.cli.main import _execute_task_configs, build_default_task_configs
+from aiconfigurator.generator.api import (
+    generate_backend_artifacts,
+    generate_naive_config,
+)
+from aiconfigurator.generator.module_bridge import task_config_to_generator_config
+from aiconfigurator.sdk.task import TaskConfig, TaskRunner
+
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.dgdr_v1beta1_types import DynamoGraphDeploymentRequestSpec
+from dynamo.profiler.utils.profile_common import derive_backend_image
+
+logger = logging.getLogger(__name__)
+
+
+def _generate_dgd_from_pick(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    best_config_df: pd.DataFrame,
+    chosen_exp: str,
+    task_configs: dict[str, TaskConfig],
+) -> dict | None:
+    """Generate a DGD config dict from the rank-1 picked result via AIC's generator."""
+    if best_config_df is None or best_config_df.empty:
+        return None
+
+    row = best_config_df.iloc[0]
+
+    tc = task_configs.get(chosen_exp)
+    # TODO: temporary workaround — when backend="auto", AIC's
+    # merge_experiment_results_by_mode collapses e.g. "agg_vllm" into "agg",
+    # but task_configs retains the original keys. Reconstruct the key from
+    # the winning row's backend column. Proper fix: AIC should return the
+    # original task config key alongside the merged chosen experiment name.
+    if tc is None and "backend" in row.index:
+        tc = task_configs.get(f"{chosen_exp}_{row['backend']}")
+    if tc is None:
+        return None
+
+    original_total_gpus = tc.total_gpus
+    if "total_gpus_needed" in row.index and row["total_gpus_needed"] > 0:
+        tc.total_gpus = int(row["total_gpus_needed"])
+
+    generator_overrides: dict = {}
+
+    k8s_overrides: dict = {}
+    k8s_overrides["k8s_image"] = derive_backend_image(dgdr.image, tc.backend_name)
+    if dgdr.modelCache:
+        if dgdr.modelCache.pvcName:
+            k8s_overrides["k8s_pvc_name"] = dgdr.modelCache.pvcName
+        if dgdr.modelCache.pvcMountPath:
+            k8s_overrides["k8s_pvc_mount_path"] = dgdr.modelCache.pvcMountPath
+        if dgdr.modelCache.pvcModelPath:
+            k8s_overrides["k8s_model_path_in_pvc"] = dgdr.modelCache.pvcModelPath
+    if k8s_overrides:
+        generator_overrides["K8sConfig"] = k8s_overrides
+
+    cfg = task_config_to_generator_config(
+        task_config=tc,
+        result_df=row,
+        generator_overrides=generator_overrides or None,
+    )
+    tc.total_gpus = original_total_gpus
+
+    artifacts = generate_backend_artifacts(
+        params=cfg,
+        backend=tc.backend_name,
+        backend_version=tc.backend_version,
+        use_dynamo_generator=True,
+    )
+    dgd_yaml = artifacts.get("k8s_deploy.yaml", "")
+    if dgd_yaml:
+        return yaml.safe_load(dgd_yaml)
+    return None
+
+
+# in naive mode, use vllm as the default backend
+_DEFAULT_NAIVE_BACKEND = "vllm"
+
+
+def _run_naive_fallback(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    model: str,
+    total_gpus: int,
+    system: str,
+    backend: str,
+) -> dict:
+    """Handle the AIC-unsupported path via naive config generation."""
+    if backend == "auto":
+        backend = _DEFAULT_NAIVE_BACKEND
+        logger.info(
+            "Auto backend resolved to '%s' for naive fallback.",
+            backend,
+        )
+    logger.info(
+        "AIC does not support this combo — falling back to naive config generation."
+    )
+    naive_result = generate_naive_config(model, total_gpus, system, backend)
+
+    dgd_yaml = naive_result.get("artifacts", {}).get("k8s_deploy.yaml", "")
+    dgd_config = yaml.safe_load(dgd_yaml) if dgd_yaml else None
+    if dgd_config:
+        config_modifier = CONFIG_MODIFIERS[backend]
+        dgd_config = config_modifier.update_image(
+            dgd_config, derive_backend_image(dgdr.image, backend)
+        )
+        if dgdr.modelCache and dgdr.modelCache.pvcName:
+            dgd_config = config_modifier.update_model_from_pvc(
+                dgd_config,
+                model_name=model,
+                pvc_name=dgdr.modelCache.pvcName,
+                pvc_mount_path=dgdr.modelCache.pvcMountPath,
+                pvc_path=dgdr.modelCache.pvcModelPath or "",
+            )
+
+    return {
+        "best_config_df": pd.DataFrame(),
+        "best_latencies": {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0},
+        "dgd_config": dgd_config,
+        "chosen_exp": None,
+    }
+
+
+def _run_autoscale_sim(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    model: str,
+    system: str,
+    backend: str,
+    total_gpus: int,
+    isl: int,
+    osl: int,
+    target_ttft: float,
+    target_tpot: float,
+    request_latency: float | None,
+) -> dict:
+    """Build a TaskConfig, run autoscale simulation, collect latencies, generate DGD."""
+    planner_cfg = dgdr.features.planner if dgdr.features else None
+    if planner_cfg and planner_cfg.enable_throughput_scaling:
+        logger.warning(
+            "Throughput-based scaling enabled — only disagg mode is supported."
+        )
+
+    task = TaskConfig(
+        serving_mode="disagg",
+        model_path=model,
+        system_name=system,
+        backend_name=backend,
+        total_gpus=total_gpus,
+        isl=isl,
+        osl=osl,
+        ttft=target_ttft,
+        tpot=target_tpot,
+        request_latency=request_latency,
+    )
+    runner = TaskRunner()
+    sim_result = runner.run(task, autoscale=True)
+    pareto_df = sim_result.get("pareto_df", pd.DataFrame())
+    best_latencies = {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0}
+    if pareto_df is not None and not pareto_df.empty:
+        row = pareto_df.iloc[0]
+        best_latencies["ttft"] = float(row.get("ttft", 0.0))
+        best_latencies["tpot"] = float(row.get("tpot", 0.0))
+        best_latencies["request_latency"] = float(row.get("request_latency", 0.0))
+
+    task_configs = {"disagg": task}
+    dgd_config = _generate_dgd_from_pick(dgdr, pareto_df, "disagg", task_configs)
+    return {
+        "best_config_df": pareto_df,
+        "best_latencies": best_latencies,
+        "dgd_config": dgd_config,
+        "chosen_exp": "disagg",
+        "task_configs": task_configs,
+    }
+
+
+def _run_default_sim(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    model: str,
+    system: str,
+    backend: str,
+    total_gpus: int,
+    isl: int,
+    osl: int,
+    target_ttft: float,
+    target_tpot: float,
+    request_latency: float | None,
+    picking_mode: str,
+) -> dict:
+    """Build default task_configs, apply load_match kwargs, run simulation, generate DGD."""
+    task_configs = build_default_task_configs(
+        model_path=model,
+        total_gpus=total_gpus,
+        system=system,
+        backend=backend,
+        isl=isl,
+        osl=osl,
+        ttft=target_ttft,
+        tpot=target_tpot,
+        request_latency=request_latency,
+    )
+
+    load_kwargs: dict = {}
+    if picking_mode == "load_match" and dgdr.workload is not None:
+        load_kwargs["target_request_rate"] = dgdr.workload.requestRate
+        load_kwargs["target_concurrency"] = dgdr.workload.concurrency
+        load_kwargs["max_total_gpus"] = total_gpus
+
+    chosen, best_configs, _, _, best_latencies_map = _execute_task_configs(
+        task_configs,
+        mode="default",
+        top_n=5,
+        **load_kwargs,
+    )
+
+    best_config_df = best_configs.get(chosen, pd.DataFrame())
+    best_latencies = best_latencies_map.get(
+        chosen, {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0}
+    )
+
+    dgd_config = _generate_dgd_from_pick(dgdr, best_config_df, chosen, task_configs)
+
+    return {
+        "best_config_df": best_config_df,
+        "best_latencies": best_latencies,
+        "dgd_config": dgd_config,
+        "chosen_exp": chosen,
+        "task_configs": task_configs,
+    }
+
+
+def run_rapid(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    picking_mode: str,
+    aic_supported: bool,
+    model: str,
+    system: str,
+    backend: str,
+    total_gpus: int,
+    isl: int,
+    osl: int,
+    target_ttft: float,
+    target_tpot: float,
+    request_latency: float | None,
+) -> dict:
+    """Run AIC simulation and picking.  Returns a result dict with
+    ``best_config_df``, ``best_latencies``, and ``dgd_config``.
+    """
+    if not aic_supported:
+        return _run_naive_fallback(dgdr, model, total_gpus, system, backend)
+    if picking_mode == "autoscale":
+        return _run_autoscale_sim(
+            dgdr,
+            model,
+            system,
+            backend,
+            total_gpus,
+            isl,
+            osl,
+            target_ttft,
+            target_tpot,
+            request_latency,
+        )
+    return _run_default_sim(
+        dgdr,
+        model,
+        system,
+        backend,
+        total_gpus,
+        isl,
+        osl,
+        target_ttft,
+        target_tpot,
+        request_latency,
+        picking_mode,
+    )
--- a/components/src/dynamo/profiler/thorough.py
+++ b/components/src/dynamo/profiler/thorough.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""THOROUGH search strategy: enumerate candidates, deploy, benchmark, pick."""
+
+import logging
+import os
+
+import pandas as pd
+import yaml
+from aiconfigurator.generator.enumerate import enumerate_profiling_configs
+from aiconfigurator.sdk.picking import pick_autoscale, pick_default, pick_load_match
+from aiconfigurator.sdk.task import TaskConfig
+
+from deploy.utils.dynamo_deployment import DynamoDeploymentClient
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.rapid import _generate_dgd_from_pick
+from dynamo.profiler.utils.aic_dataframe import (
+    build_decode_row,
+    build_disagg_df_from_static,
+    build_prefill_row,
+    make_parallel_label,
+)
+from dynamo.profiler.utils.aiperf import (
+    get_decode_itl_and_thpt_per_gpu,
+    get_prefill_ttft,
+)
+from dynamo.profiler.utils.config import Config, get_service_name_by_type
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.config_modifiers.protocol import apply_dgd_overrides
+from dynamo.profiler.utils.dgdr_v1beta1_types import (
+    DynamoGraphDeploymentRequestSpec,
+    ModelCacheSpec,
+)
+from dynamo.profiler.utils.profile_common import (
+    ProfilerOperationalConfig,
+    derive_backend_image,
+)
+from dynamo.profiler.utils.profile_decode import get_num_request_range
+
+logger = logging.getLogger(__name__)
+
+
+async def _benchmark_prefill_candidates(
+    prefill_candidates,
+    ops: ProfilerOperationalConfig,
+    isl: int,
+    osl: int,
+    model: str,
+    system: str,
+    backend: str,
+    deployment_clients: list,
+    config_modifier,
+) -> pd.DataFrame:
+    """Deploy each prefill candidate, measure TTFT, return prefill_df."""
+    prefill_rows: list[dict] = []
+    for candidate in prefill_candidates:
+        num_gpus = candidate.num_gpus
+        label = make_parallel_label(
+            candidate.tp,
+            candidate.pp,
+            candidate.dp,
+            candidate.moe_tp,
+            candidate.moe_ep,
+        )
+        tag = label.replace("=", "").replace("/", "_")
+        work_dir = f"{ops.output_dir}/prefill_{num_gpus}gpus_{tag}"
+        os.makedirs(work_dir, exist_ok=True)
+
+        config_fn = f"{work_dir}/config.yaml"
+        with open(config_fn, "w") as f:
+            yaml.dump(candidate.dgd_config, f)
+
+        model_name, model_path = config_modifier.get_model_name(candidate.dgd_config)
+        frontend_port = config_modifier.get_port(candidate.dgd_config)
+
+        logger.info("Profiling prefill candidate %s with %d GPUs...", label, num_gpus)
+
+        client = DynamoDeploymentClient(
+            namespace=ops.k8s_namespace,
+            base_log_dir=work_dir,
+            model_name=model_name,
+            frontend_port=frontend_port,
+            deployment_name=candidate.dgd_config["metadata"]["name"],
+        )
+        deployment_clients.append(client)
+        await client.create_deployment(config_fn)
+        logger.info("Waiting for prefill deployment to be ready...")
+        try:
+            await client.wait_for_deployment_ready(timeout=ops.deployment_timeout)
+        except TimeoutError:
+            logger.error("Prefill %s with %d GPUs timed out", label, num_gpus)
+            await client.delete_deployment()
+            deployment_clients.remove(client)
+            continue
+        logger.info("Prefill deployment ready")
+
+        await client.get_deployment_logs()
+
+        base_url = client.get_service_url()
+        ai_perf_dir = f"{work_dir}/aiperf_isl{isl}"
+        ttft = get_prefill_ttft(
+            isl,
+            ai_perf_dir,
+            model_name,
+            model_path,
+            base_url,
+            attention_dp_size=candidate.dp,
+        )
+
+        await client.delete_deployment()
+        deployment_clients.remove(client)
+
+        if ttft is not None:
+            prefill_rows.append(
+                build_prefill_row(
+                    model=model,
+                    isl=isl,
+                    osl=osl,
+                    ttft=ttft,
+                    tp=candidate.tp,
+                    pp=candidate.pp,
+                    dp=candidate.dp,
+                    moe_tp=candidate.moe_tp,
+                    moe_ep=candidate.moe_ep,
+                    backend=backend,
+                    system=system,
+                )
+            )
+
+    return pd.DataFrame(prefill_rows) if prefill_rows else pd.DataFrame()
+
+
+async def _benchmark_decode_candidates(
+    decode_candidates,
+    ops: ProfilerOperationalConfig,
+    isl: int,
+    osl: int,
+    model: str,
+    system: str,
+    backend: str,
+    deployment_clients: list,
+    config_modifier,
+) -> pd.DataFrame:
+    """Deploy each decode candidate, sweep num_request, return decode_df."""
+    decode_rows: list[dict] = []
+    for candidate in decode_candidates:
+        num_gpus = candidate.num_gpus
+        label = make_parallel_label(
+            candidate.tp,
+            candidate.pp,
+            candidate.dp,
+            candidate.moe_tp,
+            candidate.moe_ep,
+        )
+        tag = label.replace("=", "").replace("/", "_")
+        work_dir = f"{ops.output_dir}/decode_{num_gpus}gpus_{tag}"
+        os.makedirs(work_dir, exist_ok=True)
+
+        config_fn = f"{work_dir}/config.yaml"
+        with open(config_fn, "w") as f:
+            yaml.dump(candidate.dgd_config, f)
+
+        model_name, model_path = config_modifier.get_model_name(candidate.dgd_config)
+        frontend_port = config_modifier.get_port(candidate.dgd_config)
+
+        logger.info("Profiling decode candidate %s with %d GPUs...", label, num_gpus)
+
+        client = DynamoDeploymentClient(
+            namespace=ops.k8s_namespace,
+            base_log_dir=work_dir,
+            model_name=model_name,
+            frontend_port=frontend_port,
+            deployment_name=candidate.dgd_config["metadata"]["name"],
+        )
+        deployment_clients.append(client)
+        await client.create_deployment(config_fn)
+        logger.info("Waiting for decode deployment to be ready...")
+        try:
+            await client.wait_for_deployment_ready(timeout=ops.deployment_timeout)
+        except TimeoutError:
+            logger.error("Decode %s with %d GPUs timed out", label, num_gpus)
+            await client.delete_deployment()
+            deployment_clients.remove(client)
+            continue
+        logger.info("Decode deployment ready")
+
+        await client.get_deployment_logs()
+
+        decode_cfg = Config.model_validate(candidate.dgd_config)
+        decode_service_name = get_service_name_by_type(
+            decode_cfg, backend, SubComponentType.DECODE
+        ).lower()
+        max_kv_tokens = config_modifier.get_kv_cache_size_from_dynamo_log(
+            f"{work_dir}/{client.deployment_name}/{decode_service_name}/0.log",
+            attention_dp_size=candidate.dp,
+        )
+        max_concurrency = max_kv_tokens // (isl + osl)
+
+        sweep_num_request = get_num_request_range(
+            candidate.dp,
+            max_concurrency,
+            ops.decode_interpolation_granularity,
+        )
+        logger.info("Sweeping num_request: %s", sweep_num_request)
+
+        base_url = client.get_service_url()
+        for num_request in sweep_num_request:
+            ai_perf_dir = f"{work_dir}/aiperf_request{num_request}_isl{isl}_osl{osl}_n{num_request}"
+            itl, thpt_per_gpu = get_decode_itl_and_thpt_per_gpu(
+                isl,
+                osl,
+                num_request,
+                ai_perf_dir,
+                model_name,
+                model_path,
+                base_url=base_url,
+                num_gpus=num_gpus,
+                attention_dp_size=candidate.dp,
+            )
+            if itl is not None and thpt_per_gpu is not None:
+                decode_rows.append(
+                    build_decode_row(
+                        tpot=itl,
+                        thpt_per_gpu=thpt_per_gpu,
+                        num_request=num_request,
+                        num_gpus=num_gpus,
+                        osl=osl,
+                        tp=candidate.tp,
+                        pp=candidate.pp,
+                        dp=candidate.dp,
+                        moe_tp=candidate.moe_tp,
+                        moe_ep=candidate.moe_ep,
+                        backend=backend,
+                        system=system,
+                    )
+                )
+
+        await client.delete_deployment()
+        deployment_clients.remove(client)
+
+    return pd.DataFrame(decode_rows) if decode_rows else pd.DataFrame()
+
+
+def _pick_thorough_best_config(
+    prefill_df: pd.DataFrame,
+    decode_df: pd.DataFrame,
+    picking_mode: str,
+    target_ttft: float,
+    target_tpot: float,
+    request_latency: float | None,
+    total_gpus: int,
+    dgdr: DynamoGraphDeploymentRequestSpec,
+) -> dict:
+    """Dispatch to pick_autoscale / pick_load_match / pick_default, return result dict."""
+    if picking_mode == "autoscale":
+        return pick_autoscale(prefill_df, decode_df, target_ttft, target_tpot)
+    elif picking_mode == "load_match":
+        disagg_df = build_disagg_df_from_static(prefill_df, decode_df)
+        lm_kwargs: dict = {
+            "pareto_df": disagg_df,
+            "serving_mode": "disagg",
+            "top_n": 5,
+        }
+        if request_latency is not None:
+            lm_kwargs["target_request_latency"] = request_latency
+        else:
+            lm_kwargs["target_tpot"] = target_tpot
+        if dgdr.workload and dgdr.workload.requestRate is not None:
+            lm_kwargs["target_request_rate"] = dgdr.workload.requestRate
+        if dgdr.workload and dgdr.workload.concurrency is not None:
+            lm_kwargs["target_concurrency"] = dgdr.workload.concurrency
+        if total_gpus:
+            lm_kwargs["max_total_gpus"] = total_gpus
+        return pick_load_match(**lm_kwargs)
+    else:
+        disagg_df = build_disagg_df_from_static(prefill_df, decode_df)
+        pk_kwargs: dict = {
+            "pareto_df": disagg_df,
+            "total_gpus": total_gpus,
+            "serving_mode": "disagg",
+            "top_n": 5,
+        }
+        if request_latency is not None:
+            pk_kwargs["target_request_latency"] = request_latency
+        else:
+            pk_kwargs["target_tpot"] = target_tpot
+        return pick_default(**pk_kwargs)
+
+
+async def run_thorough(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    ops: ProfilerOperationalConfig,
+    picking_mode: str,
+    model: str,
+    system: str,
+    backend: str,
+    total_gpus: int,
+    isl: int,
+    osl: int,
+    target_ttft: float,
+    target_tpot: float,
+    request_latency: float | None,
+    deployment_clients: list,
+) -> dict:
+    """Enumerate candidates, deploy + benchmark each, build DataFrames, pick."""
+    logger.warning("THOROUGH mode: only disagg configurations are supported.")
+
+    # --- Stage 1: Enumeration ---
+    model_cache = dgdr.modelCache or ModelCacheSpec()
+    prefill_candidates, decode_candidates = enumerate_profiling_configs(
+        model_path=model,
+        system=system,
+        backend=backend,
+        image=derive_backend_image(dgdr.image, backend),
+        isl=isl,
+        osl=osl,
+        num_gpus_per_node=dgdr.hardware.numGpusPerNode,
+        k8s_pvc_name=model_cache.pvcName,
+        k8s_pvc_mount_path=model_cache.pvcMountPath,
+        k8s_model_path_in_pvc=model_cache.pvcModelPath,
+    )
+
+    logger.info(
+        "Enumerated %d prefill candidates, %d decode candidates",
+        len(prefill_candidates),
+        len(decode_candidates),
+    )
+
+    if dgdr.overrides and dgdr.overrides.dgd:
+        for candidate in prefill_candidates:
+            candidate.dgd_config = apply_dgd_overrides(
+                candidate.dgd_config, dgdr.overrides.dgd
+            )
+        for candidate in decode_candidates:
+            candidate.dgd_config = apply_dgd_overrides(
+                candidate.dgd_config, dgdr.overrides.dgd
+            )
+        logger.info(
+            "Applied DGD overrides to %d prefill + %d decode candidates.",
+            len(prefill_candidates),
+            len(decode_candidates),
+        )
+
+    config_modifier = CONFIG_MODIFIERS[backend]
+
+    # --- Stage 2: Benchmarking ---
+    prefill_df = await _benchmark_prefill_candidates(
+        prefill_candidates,
+        ops,
+        isl,
+        osl,
+        model,
+        system,
+        backend,
+        deployment_clients,
+        config_modifier,
+    )
+    decode_df = await _benchmark_decode_candidates(
+        decode_candidates,
+        ops,
+        isl,
+        osl,
+        model,
+        system,
+        backend,
+        deployment_clients,
+        config_modifier,
+    )
+
+    # --- Stage 3: Picking ---
+    if prefill_df.empty:
+        logger.error("No prefill results produced in THOROUGH mode.")
+        return {
+            "best_config_df": pd.DataFrame(),
+            "best_latencies": {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0},
+            "dgd_config": None,
+            "chosen_exp": None,
+        }
+    if decode_df.empty:
+        logger.error("No decode results produced in THOROUGH mode.")
+        return {
+            "best_config_df": pd.DataFrame(),
+            "best_latencies": {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0},
+            "dgd_config": None,
+            "chosen_exp": None,
+        }
+
+    result = _pick_thorough_best_config(
+        prefill_df,
+        decode_df,
+        picking_mode,
+        target_ttft,
+        target_tpot,
+        request_latency,
+        total_gpus,
+        dgdr,
+    )
+
+    best_config_df = result.get("best_config_df", pd.DataFrame())
+
+    # --- Stage 4: DGD generation ---
+    task = TaskConfig(
+        serving_mode="disagg",
+        model_path=model,
+        system_name=system,
+        backend_name=backend,
+        total_gpus=total_gpus,
+        isl=isl,
+        osl=osl,
+        ttft=target_ttft,
+        tpot=target_tpot,
+        request_latency=request_latency,
+    )
+    dgd_config = _generate_dgd_from_pick(
+        dgdr, best_config_df, "disagg", {"disagg": task}
+    )
+
+    return {
+        "best_config_df": best_config_df,
+        "best_latencies": result.get(
+            "best_latencies", {"ttft": 0.0, "tpot": 0.0, "request_latency": 0.0}
+        ),
+        "dgd_config": dgd_config,
+        "chosen_exp": "disagg",
+    }
--- a/components/src/dynamo/profiler/utils/aic_dataframe.py
+++ b/components/src/dynamo/profiler/utils/aic_dataframe.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Helpers to build AIC-compatible DataFrames from real-GPU benchmark results.
+
+The picking functions in ``aiconfigurator.sdk.picking`` expect DataFrames
+whose columns match the ``ColumnsStatic`` schema.  Only a subset of columns
+are actually accessed; this module populates exactly those columns.
+"""
+
+from __future__ import annotations
+
+import pandas as pd
+from aiconfigurator.sdk import common
+from aiconfigurator.sdk.picking import _build_disagg_summary_dict
+
+
+def make_parallel_label(tp: int, pp: int, dp: int, moe_tp: int, moe_ep: int) -> str:
+    """Build the ``parallel`` label string used for dedup in picking."""
+    if moe_ep > 1:
+        return f"dep{moe_ep}"
+    elif moe_tp > 1:
+        return f"tep{moe_tp}"
+    else:
+        return f"tp{tp}"
+
+
+def build_prefill_row(
+    *,
+    model: str,
+    isl: int,
+    osl: int,
+    ttft: float,
+    tp: int,
+    pp: int,
+    dp: int,
+    moe_tp: int,
+    moe_ep: int,
+    backend: str = "",
+    system: str = "",
+) -> dict:
+    """Build a single prefill row dict with the minimal columns needed by AIC picking.
+
+    Only columns actually accessed by ``pick_autoscale`` and
+    ``_build_disagg_summary_dict`` are populated.
+    """
+    num_gpus = tp * pp * dp
+    seq_s = 1000.0 / ttft * dp if ttft > 0 else 0.0
+
+    return {
+        "ttft": ttft,
+        "seq/s": seq_s,
+        "seq/s/gpu": seq_s / num_gpus if num_gpus > 0 else 0.0,
+        "global_bs": 1 * dp,
+        "parallel": make_parallel_label(tp, pp, dp, moe_tp, moe_ep),
+        "tp": tp,
+        "pp": pp,
+        "dp": dp,
+        "osl": osl,
+        "model": model,
+        "isl": isl,
+        "bs": 1,
+        "moe_tp": moe_tp,
+        "moe_ep": moe_ep,
+        "prefix": 0,
+        "gemm": "",
+        "kvcache": "",
+        "fmha": "",
+        "moe": "",
+        "comm": "",
+        "memory": "",
+        "backend": backend,
+        "version": "",
+        "system": system,
+        "power_w": 0.0,
+    }
+
+
+def build_decode_row(
+    *,
+    tpot: float,
+    thpt_per_gpu: float,
+    num_request: int,
+    num_gpus: int,
+    osl: int,
+    tp: int,
+    pp: int,
+    dp: int,
+    moe_tp: int,
+    moe_ep: int,
+    backend: str = "",
+    system: str = "",
+) -> dict:
+    """Build a single decode row dict with the minimal columns needed by AIC picking.
+
+    Only columns actually accessed by ``pick_autoscale`` and
+    ``_build_disagg_summary_dict`` are populated.
+    """
+    seq_s = thpt_per_gpu * num_gpus / osl if osl > 0 else 0.0
+
+    return {
+        "tpot": tpot,
+        "seq/s": seq_s,
+        "seq/s/gpu": thpt_per_gpu / osl if osl > 0 else 0.0,
+        "global_bs": num_request,
+        "parallel": make_parallel_label(tp, pp, dp, moe_tp, moe_ep),
+        "tp": tp,
+        "pp": pp,
+        "dp": dp,
+        "concurrency": num_request,
+        "bs": num_request // dp if dp > 0 else num_request,
+        "tokens/s/user": 1000.0 / tpot if tpot > 0 else 0.0,
+        "moe_tp": moe_tp,
+        "moe_ep": moe_ep,
+        "gemm": "",
+        "kvcache": "",
+        "fmha": "",
+        "moe": "",
+        "comm": "",
+        "memory": "",
+        "backend": backend,
+        "version": "",
+        "system": system,
+        "power_w": 0.0,
+    }
+
+
+def build_disagg_df_from_static(
+    prefill_df: pd.DataFrame,
+    decode_df: pd.DataFrame,
+) -> pd.DataFrame:
+    """Cross-product prefill x decode into a ColumnsDisagg DataFrame.
+
+    Used when calling ``pick_default`` or ``pick_load_match`` from
+    THOROUGH-mode benchmark results.
+    """
+    combos: list[dict] = []
+    for _, p_row in prefill_df.iterrows():
+        for _, d_row in decode_df.iterrows():
+            combo = _build_disagg_summary_dict(
+                prefill_summary_dict=p_row.to_dict(),
+                prefill_num_worker=1,
+                decode_summary_dict=d_row.to_dict(),
+                decode_num_worker=1,
+            )
+            combos.append(combo)
+    if not combos:
+        return pd.DataFrame(columns=common.ColumnsDisagg)
+    return pd.DataFrame(combos, columns=common.ColumnsDisagg)
--- a/components/src/dynamo/profiler/utils/config_modifiers/parallelization_mapping.py
+++ b/components/src/dynamo/profiler/utils/config_modifiers/parallelization_mapping.py
@@ -102,6 +102,51 @@ class ParallelizationMapping:
        )


+@dataclass(frozen=True)
+class PickedParallelConfig:
+    """Lightweight representation of a picked parallelization config.
+
+    Uses the same (tp, pp, dp, moe_tp, moe_ep) tuple that AIC's enumeration
+    and picking pipelines produce.  Unlike :class:`ParallelizationMapping`,
+    this stores all five dimensions explicitly rather than using mutually
+    exclusive optional fields.
+    """
+
+    tp: int = 1
+    pp: int = 1
+    dp: int = 1
+    moe_tp: int = 1
+    moe_ep: int = 1
+
+    @property
+    def num_gpus(self) -> int:
+        return self.tp * self.pp * self.dp
+
+    @property
+    def tp_size(self) -> int:
+        """Effective TP for KV-head splitting (TP or TEP; 1 for DEP)."""
+        if self.moe_ep > 1:
+            return 1
+        if self.moe_tp > 1:
+            return self.moe_tp
+        return self.tp
+
+    def label(self) -> str:
+        if self.moe_ep > 1:
+            return f"dep{self.moe_ep}"
+        elif self.moe_tp > 1:
+            return f"tep{self.moe_tp}"
+        return f"tp{self.tp}"
+
+    def to_parallelization_mapping(self) -> ParallelizationMapping:
+        """Convert to :class:`ParallelizationMapping`."""
+        if self.moe_ep > 1:
+            return ParallelizationMapping(dep=self.moe_ep)
+        elif self.moe_tp > 1:
+            return ParallelizationMapping(tep=self.moe_tp)
+        return ParallelizationMapping(tp=self.tp)
+
+
 def _check_divisibility(
    value: int | None,
    divisor: int,

--- a/components/src/dynamo/profiler/utils/config_modifiers/protocol.py
+++ b/components/src/dynamo/profiler/utils/config_modifiers/protocol.py
@@ -15,6 +15,7 @@

 from __future__ import annotations

+import copy
 import logging
 from typing import Any, Protocol, Tuple

@@ -401,18 +402,9 @@ class BaseConfigModifier:

        cls._ensure_spec_pvc(cfg, pvc_name)

-        # Mount to Frontend + prefill + decode services if present.
-        if "Frontend" in cfg.spec.services:
-            cls._ensure_service_volume_mount(
-                cfg.spec.services["Frontend"], pvc_name, pvc_mount_path
-            )
-
-        for sct in (SubComponentType.PREFILL, SubComponentType.DECODE):
-            svc_name = get_service_name_by_type(cfg, cls.BACKEND, sct)
-            if svc_name in cfg.spec.services:
-                cls._ensure_service_volume_mount(
-                    cfg.spec.services[svc_name], pvc_name, pvc_mount_path
-                )
+        # Mount PVC to all services (Frontend + workers)
+        for svc_name, svc in cfg.spec.services.items():
+            cls._ensure_service_volume_mount(svc, pvc_name, pvc_mount_path)

        # Patch workers + frontend with PVC model path.
        cls._apply_model_update_to_cfg(
@@ -515,12 +507,16 @@ class BaseConfigModifier:
        # Update model (handles worker args + frontend patching)
        effective_model_path = model_path or model_name
        if pvc_name and pvc_mount_path:
+            # Derive pvc_path from effective_model_path by stripping the mount prefix
+            pvc_path = ""
+            if effective_model_path and effective_model_path.startswith(pvc_mount_path):
+                pvc_path = effective_model_path[len(pvc_mount_path) :].strip("/")
            result = cls.update_model_from_pvc(
                cfg.model_dump(),
                model_name=model_name,
                pvc_name=pvc_name,
                pvc_mount_path=pvc_mount_path,
-                pvc_path="",
+                pvc_path=pvc_path,
            )
        else:
            result = cls.update_model(
@@ -629,3 +625,96 @@ class BaseConfigModifier:
        cls._apply_worker_config(
            cfg.spec.services[svc_name], agg_cli_args, agg_replicas, agg_gpus
        )
+
+
+# ---------------------------------------------------------------------------
+# DGD override merging (module-level, backend-agnostic)
+# ---------------------------------------------------------------------------
+
+# Services whose CLI args are fully replaced by overrides.
+# For engine-worker services (everything else), the main container args
+# are *appended* because they contain profiler-generated sweep results.
+_OVERRIDE_NON_WORKER_SERVICES = frozenset({"Frontend", "Planner"})
+
+# The exact path suffix where profiler-generated CLI args live inside a
+# service dict.  Only this specific location gets append semantics.
+_WORKER_ARGS_SUFFIX = ("extraPodSpec", "mainContainer", "args")
+
+
+def _is_worker_main_container_args(path: list[str]) -> bool:
+    """True when *path* is ``spec.services.<worker>.extraPodSpec.mainContainer.args``."""
+    if len(path) != 6:
+        return False
+    return (
+        path[0] == "spec"
+        and path[1] == "services"
+        and path[2] not in _OVERRIDE_NON_WORKER_SERVICES
+        and tuple(path[3:]) == _WORKER_ARGS_SUFFIX
+    )
+
+
+def _deep_merge_overrides(
+    target: dict,
+    overrides: dict,
+    path: list[str],
+) -> None:
+    """Recursively merge *overrides* into *target* (mutates *target* in-place).
+
+    Rules:
+    - Dicts are merged recursively; missing intermediate keys are created.
+    - ``spec.services.<name>`` that does not exist in *target* is skipped
+      with a warning (all nested overrides under that service are dropped).
+    - Only ``spec.services.<worker>.extraPodSpec.mainContainer.args`` is
+      *appended* to the existing list (preserving profiler-generated CLI
+      args).  ``args`` at any other path is replaced normally.
+    - All other leaf values replace the target value.
+    """
+    for key, value in overrides.items():
+        current_path = path + [key]
+
+        # Guard: skip overrides for services that don't exist in the DGD
+        if (
+            len(current_path) == 3
+            and current_path[0] == "spec"
+            and current_path[1] == "services"
+        ):
+            services = target.get("services", target) if path == ["spec"] else target
+            if key not in services:
+                logger.warning(
+                    "Service '%s' does not exist in the generated DGD config; "
+                    "overrides for this service will not be applied.",
+                    key,
+                )
+                continue
+
+        if isinstance(value, dict) and isinstance(target.get(key), dict):
+            _deep_merge_overrides(target[key], value, current_path)
+        elif isinstance(value, dict) and key not in target:
+            target[key] = copy.deepcopy(value)
+        elif (
+            key == "args"
+            and isinstance(value, list)
+            and _is_worker_main_container_args(current_path)
+        ):
+            existing = target.get(key) or []
+            target[key] = list(existing) + list(value)
+        else:
+            target[key] = (
+                copy.deepcopy(value) if isinstance(value, (dict, list)) else value
+            )
+
+
+def apply_dgd_overrides(dgd_config: dict, overrides: dict) -> dict:
+    """Deep-merge an ``overrides.dgd`` dict onto a generated DGD config.
+
+    Args:
+        dgd_config: The generated DynamoGraphDeployment config dict.
+        overrides: A partial DGD dict with the same structure.  Leaf values
+            overwrite the corresponding keys in *dgd_config*.
+
+    Returns:
+        A new dict with the overrides applied (the original is not mutated).
+    """
+    result = copy.deepcopy(dgd_config)
+    _deep_merge_overrides(result, overrides, path=[])
+    return result
--- a/components/src/dynamo/profiler/utils/dgd_generation.py
+++ b/components/src/dynamo/profiler/utils/dgd_generation.py
--- a/components/src/dynamo/profiler/utils/dgdr_v1beta1_types.py
+++ b/components/src/dynamo/profiler/utils/dgdr_v1beta1_types.py
@@ -223,7 +223,7 @@ class DynamoGraphDeploymentRequestSpec(BaseModel):
    )
    image: Optional[str] = Field(
        default=None,
-        description='Image is the container image reference for the profiling job (frontend image). Example: "nvcr.io/nvidia/dynamo-runtime:latest" TODO: In a future MR, the operator will derive the backend inference image from the backend type automatically; backend images can be overridden via overrides.dgd.',
+        description='Image is the container image reference for the profiling job (frontend image). Example: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0".',
    )
    modelCache: Optional[ModelCacheSpec] = Field(
        default=None,

--- a/components/src/dynamo/profiler/utils/dgdr_validate.py
+++ b/components/src/dynamo/profiler/utils/dgdr_validate.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Profiler-side validation for DynamoGraphDeploymentRequestSpec.
+
+The auto-generated Pydantic types in ``dgdr_v1beta1_types.py`` mirror the
+Go API and mark most fields as ``Optional``.  The profiler requires a
+stricter contract.  This module validates those requirements and normalises
+fields (e.g. populating defaults, resolving SLA modes) so that downstream
+code can access them without ``None`` checks.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from dynamo.planner.utils.planner_config import PlannerPreDeploymentSweepMode
+from dynamo.profiler.utils.defaults import SearchStrategy
+from dynamo.profiler.utils.dgdr_v1beta1_types import (
+    DynamoGraphDeploymentRequestSpec,
+    SLASpec,
+    WorkloadSpec,
+)
+from dynamo.profiler.utils.profile_common import is_planner_enabled
+
+logger = logging.getLogger(__name__)
+
+
+def validate_dgdr_for_profiler(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+) -> DynamoGraphDeploymentRequestSpec:
+    """Validate and normalise a DGDR spec for the profiler.
+
+    After this function returns successfully the caller can safely access:
+
+    - ``dgdr.image`` (str, non-empty)
+    - ``dgdr.hardware.gpuSku`` (str, non-empty)
+    - ``dgdr.hardware.numGpusPerNode`` (int > 0)
+    - ``dgdr.workload.isl``, ``dgdr.workload.osl`` (int)
+    - ``dgdr.sla.ttft``, ``dgdr.sla.itl`` (float) **or** ``dgdr.sla.e2eLatency`` (float)
+
+    without additional ``None`` guards.
+
+    The function mutates ``dgdr`` in-place (e.g. populating defaults) and
+    returns it for convenience.
+
+    Raises:
+        ValueError: If a required field is missing or invalid.
+    """
+    _validate_required_fields(dgdr)
+    _validate_workload(dgdr.workload)
+    _validate_sla(dgdr.sla)
+    _validate_features(dgdr)
+    return dgdr
+
+
+# ---------------------------------------------------------------------------
+# Internal validators
+# ---------------------------------------------------------------------------
+
+
+def _validate_required_fields(dgdr: DynamoGraphDeploymentRequestSpec) -> None:
+    """Check fields the profiler treats as required."""
+    if not dgdr.image:
+        raise ValueError("'image' is required in the DGDR spec.")
+
+    if not dgdr.hardware:
+        raise ValueError("'hardware' is required in the DGDR spec.")
+    if not dgdr.hardware.gpuSku:
+        raise ValueError("'hardware.gpuSku' is required in the DGDR spec.")
+    if not dgdr.hardware.numGpusPerNode or dgdr.hardware.numGpusPerNode <= 0:
+        raise ValueError("'hardware.numGpusPerNode' must be a positive integer.")
+
+    # Populate defaults for optional sub-objects so callers don't need None checks
+    if dgdr.workload is None:
+        dgdr.workload = WorkloadSpec()
+    if dgdr.sla is None:
+        dgdr.sla = SLASpec()
+
+
+def _validate_workload(workload: WorkloadSpec) -> None:
+    """Concurrency and requestRate are mutually exclusive."""
+    if workload.concurrency is not None and workload.requestRate is not None:
+        raise ValueError(
+            "Only one of 'concurrency' or 'requestRate' can be provided, not both."
+        )
+
+
+def _validate_sla(sla: SLASpec) -> None:
+    """Validate SLA targets and normalise e2eLatency mode."""
+    for name, val in [
+        ("ttft", sla.ttft),
+        ("itl", sla.itl),
+        ("e2eLatency", sla.e2eLatency),
+    ]:
+        if val is not None and val <= 0:
+            raise ValueError(f"SLA '{name}' must be positive (got {val}).")
+
+    has_e2e = sla.e2eLatency is not None
+
+    # When e2eLatency is provided it takes precedence — null out the per-token defaults
+    if has_e2e:
+        sla.ttft = None
+        sla.itl = None
+        return
+
+    has_ttft_itl = sla.ttft is not None and sla.itl is not None
+    if not has_ttft_itl:
+        raise ValueError(
+            "Either both 'ttft' and 'itl', or 'e2eLatency', must be provided in the SLA spec."
+        )
+
+
+def run_gate_checks(
+    dgdr: DynamoGraphDeploymentRequestSpec,
+    aic_supported: bool,
+    search_strategy: SearchStrategy,
+    backend: str,
+) -> None:
+    """Raise ValueError or log warnings for unsupported combos.
+
+    Must be called after ``validate_dgdr_for_profiler``.
+    """
+    if is_planner_enabled(dgdr) and not aic_supported:
+        model = dgdr.model
+        system = dgdr.hardware.gpuSku.lower()
+        planner_cfg = dgdr.features.planner
+        if planner_cfg.enable_throughput_scaling:
+            raise ValueError(
+                "Throughput-based planner scaling requires AIC support, but "
+                f"{model} on {system}/{backend} is not supported by AIC. "
+                "Use a supported model/hardware/backend combination or disable throughput scaling."
+            )
+        if (
+            planner_cfg.pre_deployment_sweeping_mode
+            == PlannerPreDeploymentSweepMode.Rapid
+        ):
+            logger.warning(
+                "Planner pre-deployment sweeping mode is 'rapid' but AIC does not support "
+                "%s on %s/%s. Falling back to 'none' (no pre-deployment sweeping).",
+                model,
+                system,
+                backend,
+            )
+            planner_cfg.pre_deployment_sweeping_mode = (
+                PlannerPreDeploymentSweepMode.None_
+            )
+
+    if search_strategy == SearchStrategy.THOROUGH and backend == "auto":
+        raise ValueError(
+            "THOROUGH search strategy does not support 'auto' backend. "
+            "Please specify a concrete backend (trtllm, vllm, sglang)."
+        )
+
+
+def _validate_features(dgdr: DynamoGraphDeploymentRequestSpec) -> None:
+    """Cross-field validation for features."""
+    if not dgdr.features:
+        return
+
+    # Mocker requires pre-deployment sweeping
+    if dgdr.features.mocker and dgdr.features.mocker.enabled and dgdr.features.planner:
+        sweep_mode = dgdr.features.planner.pre_deployment_sweeping_mode
+        if sweep_mode is None or sweep_mode == PlannerPreDeploymentSweepMode.None_:
+            raise ValueError(
+                "pre_deployment_sweeping_mode cannot be 'none' when mocker is enabled. "
+                "Mocker backend requires pre-deployment sweeping to generate simulated "
+                "performance profiles."
+            )
--- a/components/src/dynamo/profiler/utils/profile_common.py
+++ b/components/src/dynamo/profiler/utils/profile_common.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helpers and configuration for the profiler pipeline."""
+
+import logging
+import os
+from dataclasses import dataclass
+
+import pandas as pd
+
+from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
+    PickedParallelConfig,
+)
+from dynamo.profiler.utils.dgdr_v1beta1_types import DynamoGraphDeploymentRequestSpec
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Published container image naming conventions
+# ---------------------------------------------------------------------------
+
+# Mapping from backend name to the image-name component of the published
+# backend runtime image.
+# e.g. vllm → nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
+BACKEND_IMAGE_NAMES: dict[str, str] = {
+    "vllm": "vllm-runtime",
+    "sglang": "sglang-runtime",
+    "trtllm": "tensorrtllm-runtime",
+}
+
+
+def derive_backend_image(profiler_image: str, backend: str) -> str:
+    """Derive the backend worker image from the profiler image.
+
+    Replaces the image name (the last ``/``-delimited component, before any
+    ``:tag``) with the backend-specific runtime image name, preserving the
+    registry path and tag unchanged.
+
+    Examples::
+
+        derive_backend_image(
+            "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0", "vllm"
+        )
+        # → "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0"
+
+        derive_backend_image("myregistry.io/sglang-runtime:1.0.0", "sglang")
+        # → "myregistry.io/sglang-runtime:1.0.0"
+
+    Args:
+        profiler_image: Any Docker image reference of the form
+            ``[REGISTRY/]NAME[:TAG]``.
+        backend: The resolved backend type (``'vllm'``, ``'sglang'``, or
+            ``'trtllm'``).
+
+    Returns:
+        The backend container image string.
+
+    Raises:
+        ValueError: If *backend* is not a recognised backend.
+    """
+    backend_image_name = BACKEND_IMAGE_NAMES.get(backend)
+    if backend_image_name is None:
+        raise ValueError(
+            f"Cannot derive backend image for unknown backend '{backend}'. "
+            f"Supported backends: {list(BACKEND_IMAGE_NAMES.keys())}"
+        )
+
+    # Split off the last path component: "registry/path/name:tag" → "name:tag"
+    slash_idx = profiler_image.rfind("/")
+    prefix = profiler_image[: slash_idx + 1] if slash_idx >= 0 else ""
+    suffix = profiler_image[slash_idx + 1 :]
+    colon_idx = suffix.find(":")
+    tag = suffix[colon_idx:] if colon_idx >= 0 else ""
+
+    return f"{prefix}{backend_image_name}{tag}"
+
+
+# ---------------------------------------------------------------------------
+# Operational defaults not part of DynamoGraphDeploymentRequestSpec
+# ---------------------------------------------------------------------------
+
+DEFAULT_OUTPUT_DIR = "profiling_results"
+DEFAULT_NAMESPACE = os.environ.get("DGDR_NAMESPACE", "dynamo-sla-profiler")
+DEFAULT_DEPLOYMENT_TIMEOUT = 3600
+DEFAULT_PREFILL_INTERPOLATION_GRANULARITY = 16
+DEFAULT_DECODE_INTERPOLATION_GRANULARITY = 6
+DEFAULT_DRY_RUN = False
+
+
+@dataclass
+class ProfilerOperationalConfig:
+    """Operational knobs that are not part of the DGDR spec."""
+
+    output_dir: str = DEFAULT_OUTPUT_DIR
+    k8s_namespace: str = DEFAULT_NAMESPACE
+    deployment_timeout: int = DEFAULT_DEPLOYMENT_TIMEOUT
+    prefill_interpolation_granularity: int = DEFAULT_PREFILL_INTERPOLATION_GRANULARITY
+    decode_interpolation_granularity: int = DEFAULT_DECODE_INTERPOLATION_GRANULARITY
+    dry_run: bool = DEFAULT_DRY_RUN
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def picked_config_from_row(prefix: str, row: pd.Series) -> PickedParallelConfig:
+    """Extract a PickedParallelConfig from a picked ColumnsDisagg DataFrame row."""
+    return PickedParallelConfig(
+        tp=int(row.get(f"{prefix}tp", 1)),
+        pp=int(row.get(f"{prefix}pp", 1)),
+        dp=int(row.get(f"{prefix}dp", 1)),
+        moe_tp=int(row.get(f"{prefix}moe_tp", 1)),
+        moe_ep=int(row.get(f"{prefix}moe_ep", 1)),
+    )
+
+
+def resolve_model_path(dgdr: DynamoGraphDeploymentRequestSpec) -> str:
+    """Resolve the model path, preferring local PVC mount over HF ID."""
+    if (
+        dgdr.modelCache
+        and dgdr.modelCache.pvcName
+        and dgdr.modelCache.pvcMountPath
+        and dgdr.modelCache.pvcModelPath
+    ):
+        mount = dgdr.modelCache.pvcMountPath.rstrip("/")
+        sub = dgdr.modelCache.pvcModelPath.strip("/")
+        local_path = f"{mount}/{sub}"
+        if os.path.isdir(local_path):
+            return local_path
+    return dgdr.model
+
+
+def is_planner_enabled(dgdr: DynamoGraphDeploymentRequestSpec) -> bool:
+    """True when the DGDR spec has a planner config with scaling enabled."""
+    return (
+        dgdr.features is not None
+        and dgdr.features.planner is not None
+        and dgdr.features.planner.scaling_enabled()
+    )
+
+
+def determine_picking_mode(dgdr: DynamoGraphDeploymentRequestSpec) -> str:
+    target_load_provided = dgdr.workload is not None and (
+        dgdr.workload.requestRate is not None or dgdr.workload.concurrency is not None
+    )
+    if is_planner_enabled(dgdr):
+        return "autoscale"
+    elif target_load_provided:
+        return "load_match"
+    return "default"
+
+
+def warn_and_update_sla(
+    best_latencies: dict,
+    target_ttft: float,
+    target_tpot: float,
+) -> tuple[float, float]:
+    """Warn if SLA is unachievable; return (possibly updated) targets."""
+    achieved_ttft = best_latencies.get("ttft", 0.0)
+    achieved_tpot = best_latencies.get("tpot", 0.0)
+
+    if achieved_ttft > target_ttft:
+        logger.warning(
+            "TTFT SLA %.1fms is unachievable. Best achievable: %.1fms. Updating SLA.",
+            target_ttft,
+            achieved_ttft,
+        )
+        target_ttft = achieved_ttft
+
+    if achieved_tpot > target_tpot:
+        logger.warning(
+            "ITL SLA %.1fms is unachievable. Best achievable: %.1fms. Updating SLA.",
+            target_tpot,
+            achieved_tpot,
+        )
+        target_tpot = achieved_tpot
+
+    return target_ttft, target_tpot
+
+
+def warn_gpu_shortage(
+    picking_mode: str,
+    best_latencies: dict,
+    total_gpus: int,
+) -> None:
+    if picking_mode != "load_match":
+        return
+    gpus_needed = best_latencies.get("total_gpus_needed")
+    if gpus_needed is not None and gpus_needed > total_gpus:
+        logger.warning(
+            "Load target requires %d GPUs but only %d available. "
+            "Consider adding more GPUs or reducing the load target.",
+            gpus_needed,
+            total_gpus,
+        )
--- a/components/src/dynamo/profiler/utils/profiler_argparse.py
+++ b/components/src/dynamo/profiler/utils/profiler_argparse.py
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-import argparse
-import ast
-import os
-from typing import Any, Dict
-
-import yaml
-
-from dynamo.profiler.utils.defaults import SearchStrategy
-from dynamo.profiler.utils.planner_utils import add_planner_arguments_to_parser
-from dynamo.profiler.utils.search_space_autogen import auto_generate_search_space
-
-
-def _get(cfg: Dict[str, Any], camel: str, snake: str, default: Any = None) -> Any:
-    """Get config value with camelCase preferred, snake_case fallback."""
-    if camel in cfg:
-        return cfg[camel]
-    return cfg.get(snake, default)
-
-
-def _camel_to_snake(name: str) -> str:
-    """Convert camelCase to snake_case."""
-    import re
-
-    # Insert underscore before uppercase letters and lowercase
-    s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
-    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
-
-
-def parse_config_string(config_str: str) -> Dict[str, Any]:
-    """Parse configuration string as Python dict literal, YAML, or JSON.
-
-    Supports multiple input formats:
-    1. Python dict literal: "{'engine': {'backend': 'vllm'}, 'sla': {'isl': 3000}}"
-    2. YAML string: "engine:\n  backend: vllm\nsla:\n  isl: 3000"
-    3. JSON string: '{"engine": {"backend": "vllm"}, "sla": {"isl": 3000}}'
-
-    Args:
-        config_str: Configuration string in one of the supported formats
-
-    Returns:
-        Dictionary containing the configuration
-
-    Raises:
-        ValueError: If config cannot be parsed or is not a dictionary
-    """
-    config = None
-
-    # Try 1: Parse as Python dict literal (most direct for CLI)
-    try:
-        config = ast.literal_eval(config_str)
-        if isinstance(config, dict):
-            return config
-    except (ValueError, SyntaxError):
-        pass
-
-    # Try 2: Parse as YAML/JSON (for K8s ConfigMaps and files)
-    try:
-        config = yaml.safe_load(config_str)
-        if config is not None and isinstance(config, dict):
-            return config
-    except yaml.YAMLError:
-        pass
-
-    # If we got here, parsing failed
-    raise ValueError(
-        "Failed to parse config string. Expected Python dict literal, YAML, or JSON format. "
-        f"Examples:\n"
-        f"  Python dict: \"{'engine': {'backend': 'vllm'}}\"\n"
-        f'  YAML: "engine:\\n  backend: vllm"\n'
-        f'  JSON: \'{{"engine": {{"backend": "vllm"}}}}\''
-    )
-
-
-def create_profiler_parser() -> argparse.Namespace:
-    """
-    Create argument parser with support for YAML config string.
-
-    Config structure (camelCase preferred, snake_case supported for backwards compat):
-        outputDir: String (path to the output results directory, default: profiling_results)
-        deployment:
-            namespace: String (kubernetes namespace, default: dynamo-sla-profiler)
-            serviceName: String (service name, default: "")
-            model: String (served model name)
-            dgdImage: String (container image to use for DGD components (frontend, planner, workers), overrides images in config file)
-            deploymentTimeout: Int (maximum time to wait for deployment to become ready in seconds, default: 1800)
-            modelCache:
-                pvcName: String (name of the PVC to mount the model cache,
-                    if not provided, model must be HF name and will download from HF, default: "")
-                pvcPath: String (path to the model cache in the PVC, default: "")
-                mountPath: String (path to the model cache in the container,
-                    note that the PVC must be mounted to the same path for the profiling job,
-                    default: "/opt/model-cache")
-        engine:
-            backend: String (backend type, currently support [vllm, sglang, trtllm], default: vllm)
-            config: String (path to the DynamoGraphDeployment config file, default: "")
-            maxContextLength: Int (maximum context length supported by the served model, default: 0)
-            isMoeModel: Boolean (enable MoE (Mixture of Experts) model support, use TEP for prefill and DEP for decode, default: False)
-        hardware:
-            minNumGpusPerEngine: Int (minimum number of GPUs per engine, default: 0)
-            maxNumGpusPerEngine: Int (maximum number of GPUs per engine, default: 0)
-            numGpusPerNode: Int (number of GPUs per node, default: 0)
-            gpuModel: String (GPU model, used for auto-calculating search space, default: "")
-            gpuVramMib: Int (GPU VRAM in MiB, used for auto-calculating search space, default: 0)
-            system: String (target hardware system, e.g. h100_sxm, h200_sxm, default: None)
-        searchStrategy: String (search strategy for profiling: 'rapid' uses AI Configurator for quick estimation, 'thorough' runs actual deployments for comprehensive results, enum: [rapid, thorough], default: rapid)
-        sweep:
-            prefillInterpolationGranularity: Int (how many samples to benchmark to interpolate TTFT under different ISL, default: 16)
-            decodeInterpolationGranularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
-            dryRun: Boolean (dry run the profile job, default: False)
-            pickWithWebui: Boolean (pick the best parallelization mapping using webUI, default: False)
-            webuiPort: Int (webUI port, default: $PROFILER_WEBUI_PORT or 8000)
-        sla:
-            isl: Int (target input sequence length, default: 3000)
-            osl: Int (target output sequence length, default: 500)
-            ttft: Float (target Time To First Token in milliseconds, default: 50)
-            itl: Float (target Inter Token Latency in milliseconds, default: 10)
-        planner: (planner arguments)
-            e.g., plannerMinEndpoint: 2
-    """
-    # Step 1: Pre-parse to check if --profile-config is provided
-    pre_parser = argparse.ArgumentParser(add_help=False)
-    pre_parser.add_argument("--profile-config", type=str)
-    pre_args, _ = pre_parser.parse_known_args()
-
-    # Step 2: Parse config if provided
-    config = {}
-    if pre_args.profile_config:
-        config = parse_config_string(pre_args.profile_config)
-
-    # Step 3: Create main parser with config-aware defaults
-    parser = argparse.ArgumentParser(
-        description="Profile the TTFT and ITL of the Prefill and Decode engine with different parallelization mapping. When profiling prefill we mock/fix decode,when profiling decode we mock/fix prefill."
-    )
-
-    parser.add_argument(
-        "--profile-config",
-        type=str,
-        help="Configuration as Python dict literal, YAML, or JSON string. CLI args override config values. "
-        "Example: \"{'engine': {'backend': 'vllm', 'config': '/path'}, 'sla': {'isl': 3000}}\"",
-    )
-
-    # CLI arguments with config-aware defaults (using nested .get() for cleaner code)
-    parser.add_argument(
-        "--model",
-        type=str,
-        default=config.get("deployment", {}).get("model", ""),
-        help="Served model name",
-    )
-    model_cache_config = config.get("deployment", {}).get("modelCache", {})
-    parser.add_argument(
-        "--model-cache-pvc-name",
-        type=str,
-        default=model_cache_config.get("pvcName", ""),
-        help="Name of the PVC that contains the model weights. If not provided, args.model must be a HF model name and will download from HF",
-    )
-    parser.add_argument(
-        "--model-cache-pvc-path",
-        type=str,
-        default=model_cache_config.get("pvcPath", ""),
-        help="Path to the model cache in the PVC",
-    )
-    parser.add_argument(
-        "--model-cache-pvc-mount-path",
-        type=str,
-        default=model_cache_config.get("mountPath", "/opt/model-cache"),
-        help="Path to the model cache in the container, note that the PVC must be mounted to the same path for the profiling job",
-    )
-    deployment_cfg = config.get("deployment", {})
-    parser.add_argument(
-        "--dgd-image",
-        type=str,
-        default=_get(deployment_cfg, "dgdImage", "dgd_image", ""),
-        help="Container image to use for DGD components (frontend, planner, workers). Overrides images in config file.",
-    )
-    parser.add_argument(
-        "--deployment-timeout",
-        type=int,
-        default=_get(deployment_cfg, "deploymentTimeout", "deployment_timeout", 1800),
-        help="Maximum time to wait for deployment to become ready in seconds (default: 1800)",
-    )
-
-    parser.add_argument(
-        "--namespace",
-        type=str,
-        default=deployment_cfg.get("namespace", "dynamo-sla-profiler"),
-        help="Kubernetes namespace to deploy the DynamoGraphDeployment",
-    )
-    parser.add_argument(
-        "--backend",
-        type=str,
-        default=config.get("engine", {}).get("backend", "vllm"),
-        choices=["vllm", "sglang", "trtllm"],
-        help="backend type, currently support [vllm, sglang, trtllm]",
-    )
-    parser.add_argument(
-        "--config",
-        type=str,
-        default=config.get("engine", {}).get("config", ""),
-        required=False,
-        help="Path to the DynamoGraphDeployment config file (required, can be provided via CLI or config)",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default=_get(config, "outputDir", "output_dir", "profiling_results"),
-        help="Path to the output results directory",
-    )
-    hardware_cfg = config.get("hardware", {})
-    parser.add_argument(
-        "--min-num-gpus-per-engine",
-        type=int,
-        default=_get(hardware_cfg, "minNumGpusPerEngine", "min_num_gpus_per_engine", 0),
-        help="minimum number of GPUs per engine",
-    )
-    parser.add_argument(
-        "--max-num-gpus-per-engine",
-        type=int,
-        default=_get(hardware_cfg, "maxNumGpusPerEngine", "max_num_gpus_per_engine", 0),
-        help="maximum number of GPUs per engine",
-    )
-    parser.add_argument(
-        "--num-gpus-per-node",
-        type=int,
-        default=_get(hardware_cfg, "numGpusPerNode", "num_gpus_per_node", 0),
-        help="Number of GPUs per node",
-    )
-    parser.add_argument(
-        "--gpu-model",
-        type=str,
-        default=_get(hardware_cfg, "gpuModel", "gpu_model", ""),
-        help="GPU model name (used for auto-calculating search space)",
-    )
-    parser.add_argument(
-        "--gpu-vram-mib",
-        type=int,
-        default=_get(hardware_cfg, "gpuVramMib", "gpu_vram_mib", 0),
-        help="GPU VRAM in MiB (used for auto-calculating search space)",
-    )
-    parser.add_argument(
-        "--system",
-        type=str,
-        default=_get(hardware_cfg, "system", "system", None),
-        help="Target hardware system, e.g. h100_sxm, h200_sxm",
-    )
-    parser.add_argument(
-        "--isl",
-        type=int,
-        default=config.get("sla", {}).get("isl", 3000),
-        help="target input sequence length",
-    )
-    parser.add_argument(
-        "--osl",
-        type=int,
-        default=config.get("sla", {}).get("osl", 500),
-        help="target output sequence length",
-    )
-    parser.add_argument(
-        "--ttft",
-        type=float,
-        default=config.get("sla", {}).get("ttft", 50.0),
-        help="target Time To First Token (float, in milliseconds)",
-    )
-    parser.add_argument(
-        "--itl",
-        type=float,
-        default=config.get("sla", {}).get("itl", 10.0),
-        help="target Inter Token Latency (float, in milliseconds)",
-    )
-
-    # High-level profiling strategy argument
-    parser.add_argument(
-        "--search-strategy",
-        type=SearchStrategy,
-        default=SearchStrategy(
-            _get(config, "searchStrategy", "search_strategy", "rapid")
-        ),
-        choices=list(SearchStrategy),
-        help="Search strategy for profiling: 'rapid' uses AI Configurator for quick estimation, 'thorough' runs actual deployments for comprehensive results",
-    )
-
-    # arguments used for interpolating TTFT and ITL under different ISL/OSL
-    engine_cfg = config.get("engine", {})
-    parser.add_argument(
-        "--max-context-length",
-        type=int,
-        default=_get(engine_cfg, "maxContextLength", "max_context_length", 0),
-        help="maximum context length supported by the served model",
-    )
-    sweep_cfg = config.get("sweep", {})
-    parser.add_argument(
-        "--prefill-interpolation-granularity",
-        type=int,
-        default=_get(
-            sweep_cfg,
-            "prefillInterpolationGranularity",
-            "prefill_interpolation_granularity",
-            16,
-        ),
-        help="how many samples to benchmark to interpolate TTFT under different ISL",
-    )
-    parser.add_argument(
-        "--decode-interpolation-granularity",
-        type=int,
-        default=_get(
-            sweep_cfg,
-            "decodeInterpolationGranularity",
-            "decode_interpolation_granularity",
-            6,
-        ),
-        help="how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length",
-    )
-    parser.add_argument(
-        "--service-name",
-        type=str,
-        default=_get(deployment_cfg, "serviceName", "service_name", ""),
-        help="Service name for port forwarding (default: {deployment_name}-frontend)",
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        default=_get(sweep_cfg, "dryRun", "dry_run", False),
-        help="Dry run the profile job",
-    )
-    parser.add_argument(
-        "--pick-with-webui",
-        action="store_true",
-        default=_get(sweep_cfg, "pickWithWebui", "pick_with_webui", False),
-        help="Pick the best parallelization mapping using webUI",
-    )
-
-    default_webui_port = 8000
-    webui_port_env = os.environ.get("PROFILER_WEBUI_PORT")
-    if webui_port_env:
-        default_webui_port = int(webui_port_env)
-    parser.add_argument(
-        "--webui-port",
-        type=int,
-        default=_get(sweep_cfg, "webuiPort", "webui_port", default_webui_port),
-        help="WebUI port",
-    )
-
-    # Dynamically add all planner arguments from planner_argparse.py
-    add_planner_arguments_to_parser(parser, prefix="planner-")
-    # Set defaults for any planner arguments found in config.planner
-    # Normalize keys: camelCase -> snake_case, hyphens -> underscores
-    planner_config = config.get("planner", {})
-    if planner_config:
-        normalized_planner_config = {
-            _camel_to_snake(key).replace("-", "_"): value
-            for key, value in planner_config.items()
-        }
-        parser.set_defaults(**normalized_planner_config)
-
-    # Parse arguments
-    args = parser.parse_args()
-
-    # remove --profile-config from args
-    if hasattr(args, "profile_config"):
-        delattr(args, "profile_config")
-
-    # Validate required arguments
-    # Either --model or --config (or both) must be provided
-    if not args.model and not args.config:
-        parser.error("--model or --config is required (provide at least one)")
-
-    auto_generate_search_space(args)
-    return args
--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -12,7 +12,7 @@

 # For Multimodal EPD (required for device_map="auto" in vision model loading)
 accelerate
-aiconfigurator[webapp] @ git+https://github.com/ai-dynamo/aiconfigurator.git@7a24afd98714af13f061cffe784d4808f5356d45
+aiconfigurator[webapp] @ git+https://github.com/ai-dynamo/aiconfigurator.git@168a948d5bc32209728fe8639191a9e0d9083d18
 aiofiles
 aiperf @ git+https://github.com/ai-dynamo/aiperf.git@54cd6dc820bff8bfebc875da104e59d745e14f75
 av==15.0.0

--- a/deploy/helm/charts/platform/components/operator/crds/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/helm/charts/platform/components/operator/crds/nvidia.com_dynamographdeploymentrequests.yaml
@@ -594,8 +594,7 @@ spec:
                image:
                  description: |-
                    Image is the container image reference for the profiling job (frontend image).
-                    Example: "nvcr.io/nvidia/dynamo-runtime:latest"
-                    backend type automatically; backend images can be overridden via overrides.dgd.
+                    Example: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0".
                  type: string
                model:
                  description: |-

--- a/deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go
+++ b/deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go
@@ -357,9 +357,7 @@ type DynamoGraphDeploymentRequestSpec struct {
 	Backend BackendType `json:"backend,omitempty"`

 	// Image is the container image reference for the profiling job (frontend image).
-	// Example: "nvcr.io/nvidia/dynamo-runtime:latest"
-	// TODO: In a future MR, the operator will derive the backend inference image from the
-	// backend type automatically; backend images can be overridden via overrides.dgd.
+	// Example: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0".
 	// +optional
 	Image string `json:"image,omitempty"`


--- a/deploy/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
@@ -594,8 +594,7 @@ spec:
                image:
                  description: |-
                    Image is the container image reference for the profiling job (frontend image).
-                    Example: "nvcr.io/nvidia/dynamo-runtime:latest"
-                    backend type automatically; backend images can be overridden via overrides.dgd.
+                    Example: "nvcr.io/nvidia/ai-dynamo/dynamo-frontend:1.0.0".
                  type: string
                model:
                  description: |-