feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)

Signed-off-by: hongkuanz <hongkuanz@nvidia.com>

feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)
Signed-off-by: hongkuanz <hongkuanz@nvidia.com>
a04b5631 · Hongkuan Zhou · GitHub · 7b16480a · a04b5631 · a04b5631
Unverified Commit a04b5631 authored Feb 12, 2026 by Hongkuan Zhou Committed by GitHub Feb 12, 2026
20 changed files
--- a/benchmarks/profiler/utils/config_modifiers/vllm.py
+++ b/benchmarks/profiler/utils/config_modifiers/vllm.py
@@ -6,7 +6,8 @@ from typing import Tuple

 import yaml

-from benchmarks.profiler.utils.config import (
+from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.config import (
    Config,
    append_argument,
    break_arguments,
@@ -18,9 +19,12 @@ from benchmarks.profiler.utils.config import (
    update_image,
    validate_and_get_worker_args,
 )
-from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
-from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType
-from dynamo.planner.defaults import SubComponentType
+from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
+from dynamo.profiler.utils.defaults import (
+    DYNAMO_RUN_DEFAULT_PORT,
+    EngineType,
+    resolve_deploy_path,
+)

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -32,8 +36,12 @@ formatter = logging.Formatter(
 console_handler.setFormatter(formatter)
 logger.addHandler(console_handler)

-
-DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml"
+DEFAULT_VLLM_DISAGG_CONFIG_PATH = resolve_deploy_path(
+    "examples/backends/vllm/deploy/disagg.yaml"
+)
+DEFAULT_VLLM_AGG_CONFIG_PATH = resolve_deploy_path(
+    "examples/backends/vllm/deploy/agg.yaml"
+)


 class VllmV1ConfigModifier(BaseConfigModifier):
@@ -42,8 +50,13 @@ class VllmV1ConfigModifier(BaseConfigModifier):
    WORKER_MODEL_PATH_ARG = "--model"

    @classmethod
-    def load_default_config(cls) -> dict:
-        with open(DEFAULT_VLLM_CONFIG_PATH, "r") as f:
+    def load_default_config(cls, mode: str = "disagg") -> dict:
+        path = (
+            DEFAULT_VLLM_AGG_CONFIG_PATH
+            if mode == "agg"
+            else DEFAULT_VLLM_DISAGG_CONFIG_PATH
+        )
+        with open(path, "r") as f:
            return yaml.safe_load(f)

    @classmethod

--- a/benchmarks/profiler/utils/defaults.py
+++ b/benchmarks/profiler/utils/defaults.py
@@ -13,8 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import os
 from enum import Enum

+
+def resolve_deploy_path(rel_path: str) -> str:
+    """Resolve a deploy YAML path relative to the dynamo workspace root.
+
+    Uses get_workspace_dir() which handles repo root, container, and env var cases.
+    """
+    from dynamo.common.utils.paths import get_workspace_dir
+
+    return os.path.join(get_workspace_dir(), rel_path)
+
+
 DYNAMO_RUN_DEFAULT_PORT = 8000

 # set a decode maximum concurrency due to limits of profiling tools

--- a/benchmarks/profiler/utils/dgd_generation.py
+++ b/benchmarks/profiler/utils/dgd_generation.py
@@ -21,19 +21,19 @@ from typing import Any, Optional
 import numpy as np
 import yaml

-from benchmarks.profiler.utils.config import (
+from dynamo.common.utils.paths import get_workspace_dir
+from dynamo.planner.defaults import MockerComponentName, SubComponentType
+from dynamo.profiler.utils.config import (
    Config,
    DgdPlannerServiceConfig,
    set_argument_value,
 )
-from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
-from benchmarks.profiler.utils.config_modifiers.parallelization_mapping import (
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
    ParallelizationMapping,
    apply_parallel_mapping_to_config,
 )
-from benchmarks.profiler.utils.planner_utils import build_planner_args_from_namespace
-from dynamo.common.utils.paths import get_workspace_dir
-from dynamo.planner.defaults import MockerComponentName, SubComponentType
+from dynamo.profiler.utils.planner_utils import build_planner_args_from_namespace

 # Path to mocker disagg config relative to workspace
 MOCKER_DISAGG_CONFIG_PATH = "examples/backends/mocker/deploy/disagg.yaml"

--- a/benchmarks/profiler/utils/estimate_perf.py
+++ b/benchmarks/profiler/utils/estimate_perf.py
--- a/benchmarks/profiler/utils/model_info.py
+++ b/benchmarks/profiler/utils/model_info.py
--- a/benchmarks/profiler/utils/pareto.py
+++ b/benchmarks/profiler/utils/pareto.py
--- a/benchmarks/profiler/utils/planner_utils.py
+++ b/benchmarks/profiler/utils/planner_utils.py
--- a/benchmarks/profiler/utils/plot.py
+++ b/benchmarks/profiler/utils/plot.py
@@ -21,8 +21,8 @@ import numpy as np
 from matplotlib import cm
 from scipy.interpolate import griddata

-from benchmarks.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR
-from benchmarks.profiler.utils.pareto import compute_pareto
+from dynamo.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR
+from dynamo.profiler.utils.pareto import compute_pareto

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/utils/profile_decode.py
+++ b/benchmarks/profiler/utils/profile_decode.py
@@ -6,10 +6,10 @@ from typing import Callable, Optional, Tuple

 import numpy as np

-from benchmarks.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu
-from benchmarks.profiler.utils.defaults import DECODE_MAX_CONCURRENCY
-from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
-from benchmarks.profiler.utils.plot import plot_decode_3d_surface
+from dynamo.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu
+from dynamo.profiler.utils.defaults import DECODE_MAX_CONCURRENCY
+from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
+from dynamo.profiler.utils.plot import plot_decode_3d_surface

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/utils/profile_prefill.py
+++ b/benchmarks/profiler/utils/profile_prefill.py
@@ -6,9 +6,9 @@ from typing import Callable, Optional

 import numpy as np

-from benchmarks.profiler.utils.aiperf import get_prefill_ttft
-from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
-from benchmarks.profiler.utils.plot import plot_prefill_interpolation
+from dynamo.profiler.utils.aiperf import get_prefill_ttft
+from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
+from dynamo.profiler.utils.plot import plot_prefill_interpolation

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/utils/profiler_argparse.py
+++ b/benchmarks/profiler/utils/profiler_argparse.py
@@ -8,8 +8,8 @@ from typing import Any, Dict

 import yaml

-from benchmarks.profiler.utils.planner_utils import add_planner_arguments_to_parser
-from benchmarks.profiler.utils.search_space_autogen import auto_generate_search_space
+from dynamo.profiler.utils.planner_utils import add_planner_arguments_to_parser
+from dynamo.profiler.utils.search_space_autogen import auto_generate_search_space


 def _get(cfg: Dict[str, Any], camel: str, snake: str, default: Any = None) -> Any:

--- a/benchmarks/profiler/utils/profiler_status.py
+++ b/benchmarks/profiler/utils/profiler_status.py
--- a/benchmarks/profiler/utils/search_space_autogen.py
+++ b/benchmarks/profiler/utils/search_space_autogen.py
@@ -8,9 +8,9 @@ import os

 import yaml

-from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
-from benchmarks.profiler.utils.model_info import ModelInfo, get_model_info
 from deploy.utils.gpu_inventory import get_gpu_summary
+from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
+from dynamo.profiler.utils.model_info import ModelInfo, get_model_info

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

--- a/benchmarks/profiler/webui/data_template.json
+++ b/benchmarks/profiler/webui/data_template.json
--- a/benchmarks/profiler/webui/select_config.py
+++ b/benchmarks/profiler/webui/select_config.py
@@ -5,7 +5,7 @@ import json
 import logging
 import queue

-from benchmarks.profiler.webui.utils import (
+from dynamo.profiler.webui.utils import (
    add_profiling_error,
    clear_profiling_errors,
    create_gradio_interface,

--- a/benchmarks/profiler/webui/utils.py
+++ b/benchmarks/profiler/webui/utils.py
@@ -20,12 +20,12 @@ from aiconfigurator.webapp.components.profiling import (
    load_profiling_javascript,
 )

-from benchmarks.profiler.utils.dgd_generation import (
+from dynamo.profiler.utils.dgd_generation import (
    generate_decode_service_config_preview,
    generate_prefill_decode_services_config_preview,
    generate_prefill_service_config_preview,
 )
-from benchmarks.profiler.utils.pareto import compute_pareto
+from dynamo.profiler.utils.pareto import compute_pareto

 logger = logging.getLogger(__name__)


--- a/deploy/operator/api/v1alpha1/dynamographdeploymentrequest_types.go
+++ b/deploy/operator/api/v1alpha1/dynamographdeploymentrequest_types.go
@@ -49,7 +49,7 @@ type ConfigMapKeySelector struct {

 // ProfilingConfigSpec defines configuration for the profiling process.
 // This structure maps directly to the profile_sla.py config format.
-// See benchmarks/profiler/utils/profiler_argparse.py for the complete schema.
+// See dynamo/profiler/utils/profiler_argparse.py for the complete schema.
 type ProfilingConfigSpec struct {
 	// Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.
 	// The profiler will validate the configuration and report any errors.

--- a/deploy/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+++ b/deploy/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
@@ -28,7 +28,7 @@ spec:
  profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"

  # ProfilingConfig maps directly to the profile_sla.py config format
-  # See benchmarks/profiler/utils/profiler_argparse.py for complete schema
+  # See dynamo/profiler/utils/profiler_argparse.py for complete schema
  # Note: deployment.model and engine.backend are automatically set from model and backend above
  profilingConfig:
    config:

--- a/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
+++ b/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
@@ -1082,7 +1082,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
 		profilerContainer := corev1.Container{
 			Name:         ContainerNameProfiler,
 			Image:        imageName,
-			Command:      []string{"python", "-m", "benchmarks.profiler.profile_sla"},
+			Command:      []string{"python", "-m", "dynamo.profiler.profile_sla"},
 			Args:         profilerArgs,
 			Env:          profilerEnv,
 			VolumeMounts: volumeMounts,

--- a/deploy/utils/dynamo_deployment.py
+++ b/deploy/utils/dynamo_deployment.py
@@ -594,6 +594,6 @@ async def main():


 # run with:
-# uv run benchmarks/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs
+# uv run components/src/dynamo/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs
 if __name__ == "__main__":
    asyncio.run(main())