feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)

Signed-off-by: hongkuanz <hongkuanz@nvidia.com>

feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)
Signed-off-by: hongkuanz <hongkuanz@nvidia.com>
a04b5631 · Hongkuan Zhou · GitHub · 7b16480a · a04b5631 · a04b5631
Unverified Commit a04b5631 authored Feb 12, 2026 by Hongkuan Zhou Committed by GitHub Feb 12, 2026
20 changed files
--- a/.github/filters.yaml
+++ b/.github/filters.yaml
@@ -110,6 +110,7 @@ deploy:
 planner:
  - 'components/src/dynamo/planner/**'
  - 'tests/planner/**'
+  - 'components/src/dynamo/profiler/**'
  - 'components/src/dynamo/global_router/**'
 vllm:

--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -28,7 +28,7 @@ CODEOWNERS @ai-dynamo/Devops
 /components/src/dynamo/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 /components/src/dynamo/global_router/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 /examples/hierarchical_planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
-/benchmarks/profiler/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
+/components/src/dynamo/profiler/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 /tests/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
 # recipes

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -142,7 +142,7 @@ Understanding Dynamo's architecture helps you find where to make changes. For th
 | **Planner** | Real-time performance tuning and worker scaling | `components/src/dynamo/planner/` |
 | **Workers** | Backend engine integrations (vLLM, SGLang, TensorRT-LLM) | `components/src/dynamo/{vllm,sglang,trtllm}/` |
 | **KV Cache Manager** | Multi-tier cache offloading (GPU → CPU → SSD → Object Storage) | `lib/bindings/kvbm/` |
-| **SLA Profiler** | Benchmarking and SLA-driven configuration | `benchmarks/profiler/` |
+| **SLA Profiler** | Benchmarking and SLA-driven configuration | `components/src/dynamo/profiler/` |
 ### Communication Planes

--- a/components/src/dynamo/mocker/README.md
+++ b/components/src/dynamo/mocker/README.md
@@ -67,7 +67,7 @@ The profile results directory should contain `selected_prefill_interpolation/` a
 To generate profiling data for your own model/hardware configuration, run the profiler (see [SLA-driven profiling documentation](../../../../docs/pages/components/profiler/profiler-guide.md) for details):
 ```bash
-python benchmarks/profiler/profile_sla.py \
+python components/src/dynamo/profiler/profile_sla.py \
  --profile-config your_profile_config.yaml
 ```

--- a/components/src/dynamo/planner/kubernetes_connector.py
+++ b/components/src/dynamo/planner/kubernetes_connector.py
@@ -174,7 +174,7 @@ class KubernetesConnector(PlannerConnector):
                    self.graph_deployment_name
                )
-            # TODO: benchmarks/profiler/utils/config.py already contains DGD config parsing
+            # TODO: dynamo/profiler/utils/config.py already contains DGD config parsing
            # and model name logic, should consolidate
            prefill_model_name = None
            decode_model_name = None

--- a/benchmarks/profiler/README.md
+++ b/benchmarks/profiler/README.md
@@ -6,8 +6,8 @@ SPDX-License-Identifier: Apache-2.0
 # Profiler
-Documentation for the Dynamo Profiler has moved to [docs/pages/components/profiler/](../../docs/pages/components/profiler/README.md).
+Documentation for the Dynamo Profiler has moved to [docs/pages/components/profiler/](../../../../docs/pages/components/profiler/README.md).
- [Profiler Overview](../../docs/pages/components/profiler/README.md)
+- [Profiler Overview](../../../../docs/pages/components/profiler/README.md)
- [Profiler Guide](../../docs/pages/components/profiler/profiler-guide.md)
+- [Profiler Guide](../../../../docs/pages/components/profiler/profiler-guide.md)
- [Profiler Examples](../../docs/pages/components/profiler/profiler-examples.md)
+- [Profiler Examples](../../../../docs/pages/components/profiler/profiler-examples.md)
--- a/benchmarks/profiler/__init__.py
+++ b/benchmarks/profiler/__init__.py
--- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
--- a/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
--- a/benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml
--- a/benchmarks/profiler/profile_endpoint.py
+++ b/benchmarks/profiler/profile_endpoint.py
@@ -5,9 +5,9 @@ import argparse
 import logging
 import os
-from benchmarks.profiler.utils.defaults import EngineType
+from dynamo.profiler.utils.defaults import EngineType
-from benchmarks.profiler.utils.profile_decode import profile_decode
+from dynamo.profiler.utils.profile_decode import profile_decode
-from benchmarks.profiler.utils.profile_prefill import profile_prefill
+from dynamo.profiler.utils.profile_prefill import profile_prefill
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -22,49 +22,46 @@ from dataclasses import dataclass, field
 import numpy as np
 import yaml
-from benchmarks.profiler.utils.aiperf import (
+from deploy.utils.dynamo_deployment import (
+    DynamoDeploymentClient,
+    cleanup_remaining_deployments,
+)
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.aiperf import (
    get_decode_itl_and_thpt_per_gpu,
    get_prefill_ttft,
 )
-from benchmarks.profiler.utils.config import Config, get_service_name_by_type
+from dynamo.profiler.utils.config import Config, get_service_name_by_type
-from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
-from benchmarks.profiler.utils.config_modifiers.parallelization_mapping import (
+from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
    ParallelizationMapping,
    apply_parallel_mapping_to_config,
    get_candidate_parallel_mappings,
 )
-from benchmarks.profiler.utils.defaults import EngineType
+from dynamo.profiler.utils.defaults import EngineType
-from benchmarks.profiler.utils.dgd_generation import generate_dgd_config_with_planner
+from dynamo.profiler.utils.dgd_generation import generate_dgd_config_with_planner
-from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
+from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
-from benchmarks.profiler.utils.plot import (
+from dynamo.profiler.utils.plot import (
    plot_decode_performance,
    plot_pd_joint_results,
    plot_prefill_performance,
 )
-from benchmarks.profiler.utils.profile_decode import (
+from dynamo.profiler.utils.profile_decode import (
    get_num_request_range,
    profile_decode,
    profile_decode_aiconfigurator,
 )
-from benchmarks.profiler.utils.profile_prefill import (
+from dynamo.profiler.utils.profile_prefill import (
    profile_prefill,
    profile_prefill_aiconfigurator,
 )
-from benchmarks.profiler.utils.profiler_argparse import create_profiler_parser
+from dynamo.profiler.utils.profiler_argparse import create_profiler_parser
-from benchmarks.profiler.utils.profiler_status import (
+from dynamo.profiler.utils.profiler_status import ProfilerStatus, write_profiler_status
-    ProfilerStatus,
+from dynamo.profiler.webui.select_config import (
-    write_profiler_status,
-)
-from benchmarks.profiler.webui.select_config import (
    add_profiling_error,
    clear_profiling_errors,
    pick_config_with_webui,
 )
-from deploy.utils.dynamo_deployment import (
-    DynamoDeploymentClient,
-    cleanup_remaining_deployments,
-)
-from dynamo.planner.defaults import SubComponentType
 @dataclass

--- a/benchmarks/profiler/utils/__init__.py
+++ b/benchmarks/profiler/utils/__init__.py
--- a/benchmarks/profiler/utils/aiperf.py
+++ b/benchmarks/profiler/utils/aiperf.py
@@ -20,7 +20,7 @@ import random
 import subprocess
 from typing import Optional, Tuple
-from benchmarks.profiler.utils.defaults import (
+from dynamo.profiler.utils.defaults import (
    AIPERF_PREFILL_ATTN_DP_NUM_REQ_RATIO,
    AIPERF_PREFILL_BENCHMARK_OSL,
    AIPERF_WARMUP_REQUEST_PER_DP_RANK,

--- a/benchmarks/profiler/utils/config.py
+++ b/benchmarks/profiler/utils/config.py
--- a/benchmarks/profiler/utils/config_modifiers/__init__.py
+++ b/benchmarks/profiler/utils/config_modifiers/__init__.py
@@ -16,13 +16,13 @@
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
-    from benchmarks.profiler.utils.config_modifiers.protocol import (
+    from dynamo.profiler.utils.config_modifiers.protocol import (
        ConfigModifierProtocol,
    )
-from benchmarks.profiler.utils.config_modifiers.sglang import SGLangConfigModifier
+from dynamo.profiler.utils.config_modifiers.sglang import SGLangConfigModifier
-from benchmarks.profiler.utils.config_modifiers.trtllm import TrtllmConfigModifier
+from dynamo.profiler.utils.config_modifiers.trtllm import TrtllmConfigModifier
-from benchmarks.profiler.utils.config_modifiers.vllm import VllmV1ConfigModifier
+from dynamo.profiler.utils.config_modifiers.vllm import VllmV1ConfigModifier
 CONFIG_MODIFIERS: dict[str, type["ConfigModifierProtocol"]] = {
    "vllm": VllmV1ConfigModifier,

--- a/benchmarks/profiler/utils/config_modifiers/parallelization_mapping.py
+++ b/benchmarks/profiler/utils/config_modifiers/parallelization_mapping.py
@@ -6,12 +6,9 @@ import logging
 from dataclasses import dataclass
 from enum import Enum
-from benchmarks.profiler.utils.defaults import PREFILL_MAX_NUM_TOKENS
-from benchmarks.profiler.utils.model_info import (
-    MOE_ADDITIONAL_TP_ARCHITECTURES,
-    ModelInfo,
-)
 from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.defaults import PREFILL_MAX_NUM_TOKENS
+from dynamo.profiler.utils.model_info import MOE_ADDITIONAL_TP_ARCHITECTURES, ModelInfo
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/utils/config_modifiers/protocol.py
+++ b/benchmarks/profiler/utils/config_modifiers/protocol.py
@@ -15,18 +15,23 @@
 from __future__ import annotations
+import logging
 from typing import Any, Protocol, Tuple
-from benchmarks.profiler.utils.config import (
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.config import (
    Config,
    Container,
    PodSpec,
+    ServiceResources,
    break_arguments,
    get_service_name_by_type,
    set_argument_value,
+    update_image,
 )
-from benchmarks.profiler.utils.defaults import EngineType
+from dynamo.profiler.utils.defaults import EngineType
-from dynamo.planner.defaults import SubComponentType
+logger = logging.getLogger(__name__)
 class ConfigModifierProtocol(Protocol):
@@ -93,7 +98,7 @@ class ConfigModifierProtocol(Protocol):
        ...
    @classmethod
-    def load_default_config(cls) -> dict:
+    def load_default_config(cls, mode: str = "disagg") -> dict:
        ...
    @classmethod
@@ -407,3 +412,197 @@ class BaseConfigModifier:
        )
        return cfg.model_dump()
+    @classmethod
+    def build_dgd_config(
+        cls,
+        mode: str,
+        model_name: str,
+        image: str,
+        # Disagg workers (used when mode=="disagg")
+        prefill_cli_args: list[str] | None = None,
+        prefill_replicas: int = 1,
+        prefill_gpus: int = 1,
+        decode_cli_args: list[str] | None = None,
+        decode_replicas: int = 1,
+        decode_gpus: int = 1,
+        # Agg worker (used when mode=="agg")
+        agg_cli_args: list[str] | None = None,
+        agg_replicas: int = 1,
+        agg_gpus: int = 1,
+        # Optional
+        namespace: str | None = None,
+        model_path: str | None = None,
+        pvc_name: str | None = None,
+        pvc_mount_path: str | None = None,
+    ) -> dict:
+        """
+        Build a complete DynamoGraphDeployment config by loading a base YAML
+        and injecting pre-computed CLI args, model, image, replicas, and GPU resources.
+        This is intended for use by external tools (e.g. AIConfigurator) that
+        have already computed the per-worker CLI arguments and just need them
+        placed into a valid DGD config structure.
+        Args:
+            mode: "agg" or "disagg"
+            model_name: Model name / HuggingFace ID (e.g. "Qwen/Qwen3-32B")
+            image: Container image for all services
+            prefill_cli_args: Pre-computed CLI args list for prefill worker
+            prefill_replicas: Number of prefill worker replicas
+            prefill_gpus: GPUs per prefill worker
+            decode_cli_args: Pre-computed CLI args list for decode worker
+            decode_replicas: Number of decode worker replicas
+            decode_gpus: GPUs per decode worker
+            agg_cli_args: Pre-computed CLI args list for agg worker
+            agg_replicas: Number of agg worker replicas
+            agg_gpus: GPUs per agg worker
+            namespace: K8s namespace (optional)
+            model_path: Model path if different from model_name (e.g. PVC path)
+            pvc_name: PVC claim name for model cache (optional)
+            pvc_mount_path: PVC mount path (optional)
+        Returns:
+            Complete DGD config dict ready for YAML serialization
+        Raises:
+            ValueError: If mode is not "agg" or "disagg"
+        """
+        if mode not in ("agg", "disagg"):
+            raise ValueError(f"Invalid mode '{mode}': must be 'agg' or 'disagg'")
+        config = cls.load_default_config(mode=mode)
+        cfg = Config.model_validate(config)
+        # Set metadata
+        cfg.metadata.name = f"{cls.BACKEND}-{mode}"
+        if namespace and hasattr(cfg.metadata, "namespace"):
+            cfg.metadata.namespace = namespace
+        # Update image for all services
+        config = update_image(cfg.model_dump(), image)
+        cfg = Config.model_validate(config)
+        if mode == "disagg":
+            cls._apply_disagg_workers(
+                cfg,
+                prefill_cli_args=prefill_cli_args or [],
+                prefill_replicas=prefill_replicas,
+                prefill_gpus=prefill_gpus,
+                decode_cli_args=decode_cli_args or [],
+                decode_replicas=decode_replicas,
+                decode_gpus=decode_gpus,
+            )
+        else:
+            cls._apply_agg_worker(
+                cfg,
+                agg_cli_args=agg_cli_args or [],
+                agg_replicas=agg_replicas,
+                agg_gpus=agg_gpus,
+            )
+        # Update model (handles worker args + frontend patching)
+        effective_model_path = model_path or model_name
+        if pvc_name and pvc_mount_path:
+            result = cls.update_model_from_pvc(
+                cfg.model_dump(),
+                model_name=model_name,
+                pvc_name=pvc_name,
+                pvc_mount_path=pvc_mount_path,
+                pvc_path="",
+            )
+        else:
+            result = cls.update_model(
+                cfg.model_dump(),
+                model_name=model_name,
+                model_path=effective_model_path,
+            )
+        return result
+    _NON_WORKER_SERVICES = {"Frontend", "Planner"}
+    @classmethod
+    def _resolve_service_name(
+        cls,
+        cfg: Config,
+        component_type: SubComponentType,
+    ) -> str | None:
+        """Resolve the service name for a given component type, with fallback."""
+        try:
+            return get_service_name_by_type(cfg, cls.BACKEND, component_type)
+        except Exception:
+            # Fallback: find the first worker service (skip Frontend, Planner)
+            for name in cfg.spec.services:
+                if name not in cls._NON_WORKER_SERVICES:
+                    return name
+            return None
+    @staticmethod
+    def _apply_worker_config(
+        service: Any,
+        cli_args: list[str],
+        replicas: int,
+        gpus: int,
+    ) -> None:
+        """Apply CLI args, replicas, and GPU resources to a single worker service."""
+        service.replicas = replicas
+        if service.resources is None:
+            service.resources = ServiceResources()
+        if service.resources.limits is None:
+            service.resources.limits = {}
+        service.resources.limits["gpu"] = str(gpus)
+        if service.extraPodSpec and service.extraPodSpec.mainContainer:
+            service.extraPodSpec.mainContainer.args = list(cli_args)
+    @classmethod
+    def _apply_disagg_workers(
+        cls,
+        cfg: Config,
+        prefill_cli_args: list[str],
+        prefill_replicas: int,
+        prefill_gpus: int,
+        decode_cli_args: list[str],
+        decode_replicas: int,
+        decode_gpus: int,
+    ) -> None:
+        """Apply CLI args, replicas, and GPU resources to disagg worker services."""
+        for sct, cli_args, replicas, gpus in [
+            (
+                SubComponentType.PREFILL,
+                prefill_cli_args,
+                prefill_replicas,
+                prefill_gpus,
+            ),
+            (SubComponentType.DECODE, decode_cli_args, decode_replicas, decode_gpus),
+        ]:
+            svc_name = cls._resolve_service_name(cfg, sct)
+            if svc_name is None or svc_name not in cfg.spec.services:
+                logger.warning(
+                    "Could not find %s service for backend %s, skipping",
+                    sct.value,
+                    cls.BACKEND,
+                )
+                continue
+            cls._apply_worker_config(
+                cfg.spec.services[svc_name], cli_args, replicas, gpus
+            )
+    @classmethod
+    def _apply_agg_worker(
+        cls,
+        cfg: Config,
+        agg_cli_args: list[str],
+        agg_replicas: int,
+        agg_gpus: int,
+    ) -> None:
+        """Apply CLI args, replicas, and GPU resources to the agg worker service."""
+        svc_name = cls._resolve_service_name(cfg, SubComponentType.DECODE)
+        if svc_name is None or svc_name not in cfg.spec.services:
+            logger.warning("Could not find worker service for agg mode")
+            return
+        cls._apply_worker_config(
+            cfg.spec.services[svc_name], agg_cli_args, agg_replicas, agg_gpus
+        )
--- a/benchmarks/profiler/utils/config_modifiers/sglang.py
+++ b/benchmarks/profiler/utils/config_modifiers/sglang.py
@@ -7,7 +7,8 @@ from typing import Tuple
 import yaml
-from benchmarks.profiler.utils.config import (
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.config import (
    Config,
    append_argument,
    break_arguments,
@@ -19,9 +20,12 @@ from benchmarks.profiler.utils.config import (
    update_image,
    validate_and_get_worker_args,
 )
-from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
+from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
-from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType
+from dynamo.profiler.utils.defaults import (
-from dynamo.planner.defaults import SubComponentType
+    DYNAMO_RUN_DEFAULT_PORT,
+    EngineType,
+    resolve_deploy_path,
+)
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -33,16 +37,25 @@ formatter = logging.Formatter(
 console_handler.setFormatter(formatter)
 logger.addHandler(console_handler)
+DEFAULT_SGLANG_DISAGG_CONFIG_PATH = resolve_deploy_path(
-DEFAULT_SGLANG_CONFIG_PATH = "examples/backends/sglang/deploy/disagg.yaml"
+    "examples/backends/sglang/deploy/disagg.yaml"
+)
+DEFAULT_SGLANG_AGG_CONFIG_PATH = resolve_deploy_path(
+    "examples/backends/sglang/deploy/agg.yaml"
+)
 class SGLangConfigModifier(BaseConfigModifier):
    BACKEND = "sglang"
    @classmethod
-    def load_default_config(cls) -> dict:
+    def load_default_config(cls, mode: str = "disagg") -> dict:
-        with open(DEFAULT_SGLANG_CONFIG_PATH, "r") as f:
+        path = (
+            DEFAULT_SGLANG_AGG_CONFIG_PATH
+            if mode == "agg"
+            else DEFAULT_SGLANG_DISAGG_CONFIG_PATH
+        )
+        with open(path, "r") as f:
            return yaml.safe_load(f)
    @classmethod

--- a/benchmarks/profiler/utils/config_modifiers/trtllm.py
+++ b/benchmarks/profiler/utils/config_modifiers/trtllm.py
@@ -8,7 +8,8 @@ from typing import Tuple
 import yaml
-from benchmarks.profiler.utils.config import (
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.config import (
    Config,
    append_argument,
    break_arguments,
@@ -20,9 +21,12 @@ from benchmarks.profiler.utils.config import (
    update_image,
    validate_and_get_worker_args,
 )
-from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
+from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
-from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType
+from dynamo.profiler.utils.defaults import (
-from dynamo.planner.defaults import SubComponentType
+    DYNAMO_RUN_DEFAULT_PORT,
+    EngineType,
+    resolve_deploy_path,
+)
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -34,16 +38,25 @@ formatter = logging.Formatter(
 console_handler.setFormatter(formatter)
 logger.addHandler(console_handler)
+DEFAULT_TRTLLM_DISAGG_CONFIG_PATH = resolve_deploy_path(
-DEFAULT_TRTLLM_CONFIG_PATH = "examples/backends/trtllm/deploy/disagg.yaml"
+    "examples/backends/trtllm/deploy/disagg.yaml"
+)
+DEFAULT_TRTLLM_AGG_CONFIG_PATH = resolve_deploy_path(
+    "examples/backends/trtllm/deploy/agg.yaml"
+)
 class TrtllmConfigModifier(BaseConfigModifier):
    BACKEND = "trtllm"
    @classmethod
-    def load_default_config(cls) -> dict:
+    def load_default_config(cls, mode: str = "disagg") -> dict:
-        with open(DEFAULT_TRTLLM_CONFIG_PATH, "r") as f:
+        path = (
+            DEFAULT_TRTLLM_AGG_CONFIG_PATH
+            if mode == "agg"
+            else DEFAULT_TRTLLM_DISAGG_CONFIG_PATH
+        )
+        with open(path, "r") as f:
            return yaml.safe_load(f)
    @classmethod