feat: turn profiling k8s jobs into sample DGDR requests (#3864)

Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hongkuan Zhou <tedzhouhk@gmail.com> Co-authored-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Hongkuan Zhou <tedzhouhk@gmail.com>

feat: turn profiling k8s jobs into sample DGDR requests (#3864)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hongkuan Zhou <tedzhouhk@gmail.com> Co-authored-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Hongkuan Zhou <tedzhouhk@gmail.com>
6a84ffd3 · hhzhang16 · GitHub · 0d07e2c3 · 6a84ffd3 · 6a84ffd3
Unverified Commit 6a84ffd3 authored Oct 27, 2025 by hhzhang16 Committed by GitHub Oct 27, 2025
20 changed files
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ Rerun with `curl -N` and change `stream` in the request to `true` to get the res
 Dynamo provides comprehensive benchmarking tools to evaluate and optimize your deployments:

 - **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf
- **[Pre-Deployment Profiling](docs/benchmarks/pre_deployment_profiling.md)** – Optimize configurations before deployment to meet SLA requirements
+- **[SLA-Driven Dynamo Deployments](docs/planner/sla_planner_quickstart.md)** – Optimize your deployment to meet SLA requirements

 # Engines


--- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# DynamoGraphDeploymentRequest for AI Configurator-based profiling
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeploymentRequest
+metadata:
+  name: sla-aic
+spec:
+  model: Qwen/Qwen3-32B
+  backend: trtllm
+
+  # ProfilingConfig maps directly to the profile_sla.py config format
+  profilingConfig:
+    profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
+    config:
+      # Sweep/profiling configuration
+      sweep:
+        # AI Configurator mode (fast simulation-based profiling)
+        use_ai_configurator: true
+        aic_system: h200_sxm
+        aic_model_name: QWEN3_32B
+        aic_backend_version: "0.20.0"
+
+      # SLA targets for profiling
+      sla:
+        isl: 3000   # Input sequence length
+        osl: 150    # Output sequence length
+        ttft: 500.0 # Time To First Token target (milliseconds)
+        itl: 30.0   # Inter-Token Latency target (milliseconds)
+
+  # Deployment overrides for the auto-created DGD
+  deploymentOverrides:
+    workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-540.5"
+
+  # Automatically create DynamoGraphDeployment after profiling
+  autoApply: true
+
--- a/benchmarks/profiler/deploy/profile_sla_aic_job.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_job.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-# TODO: update to dgdr spec for AIC
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: profile-sla
-  namespace: ${NAMESPACE}
-spec:
-  template:
-    spec:
-      serviceAccountName: dynamo-sa
-      containers:
-      - name: profile-sla
-        image: ${DOCKER_IMAGE}
-        resources:
-          requests:
-            cpu: "16"
-            memory: "10Gi"
-        env:
-          - name: HUGGING_FACE_HUB_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: hf-token-secret
-                key: HF_TOKEN
-          - name: NATS_SERVER
-            value: nats://${NAMESPACE}-nats:4222
-          - name: ETCD_ENDPOINTS
-            value: ${NAMESPACE}-etcd:2379
-        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
-        args:
-          - --config
-          - ${DGD_CONFIG_FILE}
-          - --output-dir
-          - /data/profiling_results
-          - --namespace
-          - ${NAMESPACE}
-          - --backend
-          - vllm
-          - --min-num-gpus-per-engine
-          - "1"
-          - --max-num-gpus-per-engine
-          - "8"
-          - --isl
-          - "3000"
-          - --osl
-          - "150"
-          - --ttft
-          - "500"
-          - --itl
-          - "30"
-          - --use-ai-configurator
-          - --aic-system
-          - h200_sxm
-          - --aic-model-name
-          - QWEN3_32B
-          - --aic-backend-version
-          - 0.20.0
-        volumeMounts:
-          - name: output-volume
-            mountPath: /data
-      restartPolicy: Never
-      volumes:
-        - name: output-volume
-          persistentVolumeClaim:
-            claimName: dynamo-pvc
-  backoffLimit: 0
--- a/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# DynamoGraphDeploymentRequest for standard online profiling
+# Converted from profile_sla_job.yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeploymentRequest
+metadata:
+  name: sla-online
+spec:
+  model: Qwen/Qwen3-0.6B
+  backend: vllm
+
+  # ProfilingConfig maps directly to the profile_sla.py config format
+  profilingConfig:
+    profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
+    config:
+      # Sweep/profiling configuration
+      sweep:
+        skip_existing_results: true
+        # Standard online profiling (not using AI Configurator)
+        use_ai_configurator: false
+
+      # SLA targets for profiling
+      sla:
+        isl: 3000   # Input sequence length
+        osl: 150    # Output sequence length
+        ttft: 200.0 # Time To First Token target (milliseconds)
+        itl: 20.0   # Inter-Token Latency target (milliseconds)
+
+  # Deployment overrides for the auto-created DGD
+  deploymentOverrides:
+    workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
+
+  # Automatically create DynamoGraphDeployment after profiling
+  autoApply: true
+
--- a/benchmarks/profiler/deploy/profile_sla_job.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_job.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-# TODO: update to dgdr spec for online mode
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: profile-sla
-  namespace: ${NAMESPACE}
-spec:
-  template:
-    spec:
-      serviceAccountName: dynamo-sa
-      containers:
-      - name: profile-sla
-        image: ${DOCKER_IMAGE}
-        resources:
-          requests:
-            cpu: "16"
-            memory: "10Gi"
-        env:
-          - name: HUGGING_FACE_HUB_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: hf-token-secret
-                key: HF_TOKEN
-          - name: NATS_SERVER
-            value: nats://${NAMESPACE}-nats:4222
-          - name: ETCD_ENDPOINTS
-            value: ${NAMESPACE}-etcd:2379
-        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
-        args:
-          - --config
-          - ${DGD_CONFIG_FILE}
-          - --output-dir
-          - /data/profiling_results
-          - --namespace
-          - ${NAMESPACE}
-          - --backend
-          - vllm
-          - --min-num-gpus-per-engine
-          - "1"
-          - --max-num-gpus-per-engine
-          - "8"
-          - --isl
-          - "3000"
-          - --osl
-          - "150"
-          - --ttft
-          - "200"
-          - --itl
-          - "20"
-        volumeMounts:
-          - name: output-volume
-            mountPath: /data
-      restartPolicy: Never
-      volumes:
-        - name: output-volume
-          persistentVolumeClaim:
-            claimName: dynamo-pvc
-  backoffLimit: 0
--- a/benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# DynamoGraphDeploymentRequest for MoE model profiling
+# Converted from profile_sla_moe_job.yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeploymentRequest
+metadata:
+  name: sla-moe
+spec:
+  model: deepseek-ai/DeepSeek-R1
+  backend: sglang
+
+  # ProfilingConfig maps directly to the profile_sla.py config format
+  profilingConfig:
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1"
+    config:
+      # Engine configuration
+      engine:
+        is_moe_model: true  # Enable MoE model support (uses TEP/DEP instead of TP)
+
+      # Sweep/profiling configuration
+      sweep:
+        # Standard online profiling (not using AI Configurator)
+        use_ai_configurator: false
+
+      # SLA targets for profiling
+      sla:
+        isl: 3000   # Input sequence length
+        osl: 150    # Output sequence length
+        ttft: 200.0 # Time To First Token target (milliseconds)
+        itl: 20.0   # Inter-Token Latency target (milliseconds)
+
+    # Reference to ConfigMap containing the DGD base config
+    # For MoE models, this should point to the appropriate disagg config
+    # Original path: /sgl-workspace/dynamo/recipes/deepseek-r1/sglang/disagg-16gpu.yaml
+    configMapRef:
+      name: deepseek-r1-config
+      key: tep16p-dep16d-disagg.yaml
+
+  # Deployment overrides for the auto-created DGD
+  deploymentOverrides:
+    workersImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1"
+
+  # Automatically create DynamoGraphDeployment after profiling
+  autoApply: true
+
--- a/benchmarks/profiler/deploy/profile_sla_moe_job.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_moe_job.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-# TODO: update to dgdr spec for MoE model
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: profile-sla
-  namespace: ${NAMESPACE}
-spec:
-  template:
-    spec:
-      serviceAccountName: dynamo-sa
-      containers:
-      - name: profile-sla
-        image: ${DOCKER_IMAGE}
-        resources:
-          requests:
-            cpu: "32"
-            memory: "50Gi"
-        env:
-          - name: HUGGING_FACE_HUB_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: hf-token-secret
-                key: HF_TOKEN
-          - name: NATS_SERVER
-            value: nats://${NAMESPACE}-nats:4222
-          - name: ETCD_ENDPOINTS
-            value: ${NAMESPACE}-etcd:2379
-        workingDir: /sgl-workspace/dynamo
-        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
-        args:
-          - --config
-          - /sgl-workspace/dynamo/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
-          - --output-dir
-          - /data/profiling_results
-          - --namespace
-          - ${NAMESPACE}
-          - --backend
-          - sglang
-          - --is-moe-model
-          - --min-num-gpus-per-engine
-          - "8"
-          - --max-num-gpus-per-engine
-          - "16"
-          - --isl
-          - "3000"
-          - --osl
-          - "150"
-          - --ttft
-          - "200"
-          - --itl
-          - "20"
-        volumeMounts:
-          - name: output-volume
-            mountPath: /data
-      restartPolicy: Never
-      volumes:
-        - name: output-volume
-          persistentVolumeClaim:
-            claimName: dynamo-pvc
-  backoffLimit: 0
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -92,6 +92,11 @@ async def run_profile(args):
        with open(args.config, "r") as f:
            config = yaml.safe_load(f)

+        config = config_modifier.update_model(config, args.model)
+        if args.dgd_image:
+            config = config_modifier.update_image(config, args.dgd_image)
+            logger.info(f"Using DGD image: {args.dgd_image}")
+
        if args.is_moe_model:
            # For MoE models, use range with stride of num_gpus_per_node
            profile_num_gpus = list(

--- a/benchmarks/profiler/utils/config.py
+++ b/benchmarks/profiler/utils/config.py
@@ -358,6 +358,29 @@ def set_argument_value(args: list, arg_name: str, value: str):
    return args


+def update_image(config: dict, image: str) -> dict:
+    """Update container image for all DGD services (frontend, planner, workers).
+
+    This is a shared utility function used by all backend config modifiers.
+
+    Args:
+        config: Configuration dictionary
+        image: Container image to set for all services
+
+    Returns:
+        Updated configuration dictionary
+    """
+    cfg = Config.model_validate(config)
+
+    # Update image for all services
+    for service_name, service_config in cfg.spec.services.items():
+        if service_config.extraPodSpec and service_config.extraPodSpec.mainContainer:
+            service_config.extraPodSpec.mainContainer.image = image
+            logger.debug(f"Updated image for {service_name} to {image}")
+
+    return cfg.model_dump()
+
+
 class ConfigModifierProtocol(Protocol):
    @classmethod
    def convert_config(
@@ -419,6 +442,10 @@ class ConfigModifierProtocol(Protocol):
    def update_model(cls, config: dict, model_name: str) -> dict:
        ...

+    @classmethod
+    def update_image(cls, config: dict, image: str) -> dict:
+        ...
+

 def generate_dgd_config_with_planner(
    config_path: str,
@@ -450,6 +477,15 @@ def generate_dgd_config_with_planner(
    with open(config_path, "r") as f:
        config = yaml.safe_load(f)

+    # Update model name in config from profiling args
+    # This ensures the final DGD uses the model specified in the DGDR, not the default in the config file
+    config = config_modifier.update_model(config, args.model)
+
+    # Update container image if provided
+    # This overrides the default image in the config file for all DGD components
+    if args.dgd_image:
+        config = config_modifier.update_image(config, args.dgd_image)
+
    if not is_moe_model:
        # dense model, use TP for both prefill and decode
        config = config_modifier.set_config_tp_size(

--- a/benchmarks/profiler/utils/config_modifiers/sglang.py
+++ b/benchmarks/profiler/utils/config_modifiers/sglang.py
@@ -16,6 +16,7 @@ from benchmarks.profiler.utils.config import (
    remove_valued_arguments,
    set_argument_value,
    setup_worker_service_resources,
+    update_image,
    validate_and_get_worker_args,
 )
 from benchmarks.profiler.utils.defaults import (
@@ -72,6 +73,11 @@ class SGLangConfigModifier:

        return cfg.model_dump()

+    @classmethod
+    def update_image(cls, config, image: str) -> dict:
+        """Update container image for all DGD services (frontend, planner, workers)."""
+        return update_image(config, image)
+
    @classmethod
    def convert_config(
        cls,

--- a/benchmarks/profiler/utils/config_modifiers/trtllm.py
+++ b/benchmarks/profiler/utils/config_modifiers/trtllm.py
@@ -18,6 +18,7 @@ from benchmarks.profiler.utils.config import (
    remove_valued_arguments,
    set_argument_value,
    setup_worker_service_resources,
+    update_image,
    validate_and_get_worker_args,
 )
 from benchmarks.profiler.utils.defaults import (
@@ -74,6 +75,11 @@ class TrtllmConfigModifier:

        return cfg.model_dump()

+    @classmethod
+    def update_image(cls, config, image: str) -> dict:
+        """Update container image for all DGD services (frontend, planner, workers)."""
+        return update_image(config, image)
+
    @classmethod
    def convert_config(
        cls,

--- a/benchmarks/profiler/utils/config_modifiers/vllm.py
+++ b/benchmarks/profiler/utils/config_modifiers/vllm.py
@@ -14,6 +14,7 @@ from benchmarks.profiler.utils.config import (
    get_worker_service_from_config,
    set_argument_value,
    setup_worker_service_resources,
+    update_image,
    validate_and_get_worker_args,
 )
 from benchmarks.profiler.utils.defaults import (
@@ -69,6 +70,11 @@ class VllmV1ConfigModifier:

        return cfg.model_dump()

+    @classmethod
+    def update_image(cls, config, image: str) -> dict:
+        """Update container image for all DGD services (frontend, planner, workers)."""
+        return update_image(config, image)
+
    @classmethod
    def convert_config(
        cls,

--- a/benchmarks/profiler/utils/profiler_argparse.py
+++ b/benchmarks/profiler/utils/profiler_argparse.py
@@ -121,6 +121,12 @@ def create_profiler_parser() -> argparse.Namespace:
        default=config.get("deployment", {}).get("model", ""),
        help="Model to serve, can be HF model name or local model path",
    )
+    parser.add_argument(
+        "--dgd-image",
+        type=str,
+        default=config.get("deployment", {}).get("dgd_image", ""),
+        help="Container image to use for DGD components (frontend, planner, workers). Overrides images in config file.",
+    )

    # CLI arguments with config-aware defaults (using nested .get() for cleaner code)
    parser.add_argument(
@@ -295,10 +301,9 @@ def create_profiler_parser() -> argparse.Namespace:
        delattr(args, "profile_config")

    # Validate required arguments
-    if not args.config:
-        parser.error("--config is required (either via CLI or profile-config)")
+    # Either --model or --config (or both) must be provided
    if not args.model and not args.config:
-        parser.error("--model or --config is required")
+        parser.error("--model or --config is required (provide at least one)")

    auto_generate_search_space(args)


--- a/components/backends/trtllm/deploy/README.md
+++ b/components/backends/trtllm/deploy/README.md
@@ -53,7 +53,7 @@ Advanced disaggregated deployment with SLA-based automatic scaling.
 - `TRTLLMPrefillWorker`: Specialized prefill-only worker

 > [!NOTE]
-> This deployment requires pre-deployment profiling to be completed first. See [Pre-Deployment Profiling](../../../../docs/benchmarks/pre_deployment_profiling.md) for detailed instructions.
+> This deployment requires pre-deployment profiling to be completed first. See [Pre-Deployment Profiling](../../../../docs/benchmarks/sla_driven_profiling.md) for detailed instructions.

 ## CRD Structure


--- a/components/backends/vllm/deploy/README.md
+++ b/components/backends/vllm/deploy/README.md
@@ -99,7 +99,7 @@ We have public images available on [NGC Catalog](https://catalog.ngc.nvidia.com/

 ### Pre-Deployment Profiling (SLA Planner Only)

-If using the SLA Planner deployment (`disagg_planner.yaml`), follow the [pre-deployment profiling guide](../../../../docs/benchmarks/pre_deployment_profiling.md) to run pre-deployment profiling. The results will be saved to the `dynamo-pvc` PVC and queried by the SLA Planner.
+If using the SLA Planner deployment (`disagg_planner.yaml`), follow the [pre-deployment profiling guide](../../../../docs/benchmarks/sla_driven_profiling.md) to run pre-deployment profiling. The results will be saved to the `dynamo-pvc` PVC and queried by the SLA Planner.

 ## Usage


--- a/components/src/dynamo/planner/utils/perf_interpolation.py
+++ b/components/src/dynamo/planner/utils/perf_interpolation.py
@@ -28,7 +28,7 @@ logger = logging.getLogger(__name__)

 MISSING_PROFILING_DATA_ERROR_MESSAGE = (
    "SLA-Planner requires pre-deployment profiling results to run.\n"
-    "Please follow /docs/benchmarks/pre_deployment_profiling.md to run the profiling first,\n"
+    "Please follow /docs/benchmarks/sla_driven_profiling.md to run the profiling first,\n"
    "and make sure the profiling results are present in --profile-results-dir."
 )


--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentrequests.yaml
@@ -33,7 +33,7 @@ spec:
  scope: Namespaced
  versions:
    - additionalPrinterColumns:
-        - jsonPath: .spec.modelName
+        - jsonPath: .spec.model
          name: Model
          type: string
        - jsonPath: .status.backend
@@ -94,6 +94,15 @@ spec:
                    after profiling completes. If false, only the spec is generated and stored in status.
                    Users can then manually create a DGD using the generated spec.
                  type: boolean
+                backend:
+                  description: |-
+                    Backend specifies the inference backend to use.
+                    The controller automatically sets this value in profilingConfig.config.engine.backend.
+                  enum:
+                    - vllm
+                    - sglang
+                    - trtllm
+                  type: string
                deploymentOverrides:
                  description: |-
                    DeploymentOverrides allows customizing metadata for the auto-created DGD.
@@ -121,18 +130,27 @@ spec:
                        Namespace is the desired namespace for the created DynamoGraphDeployment.
                        If not specified, defaults to the DGDR namespace.
                      type: string
+                    workersImage:
+                      description: |-
+                        WorkersImage specifies the container image to use for DynamoGraphDeployment worker components.
+                        This image is used for both temporary DGDs created during online profiling and the final DGD.
+                        If omitted, the image from the base config file (e.g., disagg.yaml) is used.
+                        Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
+                      type: string
                  type: object
-                modelName:
+                model:
                  description: |-
-                    ModelName specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").
+                    Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").
                    This is a high-level identifier for easy reference in kubectl output and logs.
+                    The controller automatically sets this value in profilingConfig.config.deployment.model.
                  type: string
                profilingConfig:
                  description: |-
                    ProfilingConfig provides the complete configuration for the profiling job.
                    This configuration is passed directly to the profiler.
                    The structure matches the profile_sla config format exactly (see ProfilingConfigSpec for schema).
-                    The profiler will validate the configuration and report any errors.
+                    Note: deployment.model and engine.backend are automatically set from the high-level
+                    modelName and backend fields and should not be specified in this config.
                  properties:
                    config:
                      description: |-
@@ -156,9 +174,18 @@ spec:
                      required:
                        - name
                      type: object
+                    profilerImage:
+                      description: |-
+                        ProfilerImage specifies the container image to use for profiling jobs.
+                        This image contains the profiler code and dependencies needed for SLA-based profiling.
+                        Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
+                      type: string
+                  required:
+                    - profilerImage
                  type: object
              required:
-                - modelName
+                - backend
+                - model
                - profilingConfig
              type: object
            status:

--- a/deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
@@ -124,10 +124,9 @@ spec:
          - --mpi-run-ssh-secret-name={{ .Values.dynamo.mpiRun.secretName }}
          - --mpi-run-ssh-secret-namespace={{ .Release.Namespace }}
        {{- end }}
-        {{- if .Values.dynamo.dgdr.profilerImage }}
-          - --profiler-image={{ .Values.dynamo.dgdr.profilerImage }}
-        {{- end }}
-        {{- if not .Values.namespaceRestriction.enabled }}
+        {{- if .Values.namespaceRestriction.enabled }}
+          - --dgdr-profiling-cluster-role-name={{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-dgdr-profiling-nodes
+        {{- else }}
          - --dgdr-profiling-cluster-role-name={{ include "dynamo-operator.fullname" . }}-dgdr-profiling
          - --planner-cluster-role-name={{ include "dynamo-operator.fullname" . }}-planner
        {{- end }}

--- a/deploy/cloud/helm/platform/components/operator/templates/profiling-job-rbac.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/profiling-job-rbac.yaml
@@ -73,12 +73,11 @@ subjects:
 - kind: ServiceAccount
  name: dgdr-profiling-job
  namespace: {{ .Release.Namespace }}
-
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
-  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling-nodes
+  name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-dgdr-profiling-nodes
  labels:
    {{- include "dynamo-operator.labels" . | nindent 4 }}
    app.kubernetes.io/component: dgdr-profiling
@@ -87,7 +86,22 @@ rules:
 - apiGroups: [""]
  resources: ["nodes"]
  verbs: ["get", "list", "watch"]
-
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-dgdr-profiling-nodes
+  labels:
+    {{- include "dynamo-operator.labels" . | nindent 4 }}
+    app.kubernetes.io/component: dgdr-profiling
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-dgdr-profiling-nodes
+subjects:
+- kind: ServiceAccount
+  name: dgdr-profiling-job
+  namespace: {{ .Release.Namespace }}
 {{- else }}
 # Cluster-wide mode: ClusterRole for DGDR profiling jobs
 ---
@@ -122,21 +136,20 @@ rules:
 - apiGroups: [""]
  resources: ["nodes"]
  verbs: ["get", "list", "watch"]
-{{- end }}
-
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
-  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling-nodes
+  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling
  labels:
    {{- include "dynamo-operator.labels" . | nindent 4 }}
    app.kubernetes.io/component: dgdr-profiling
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
-  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling-nodes
+  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling
 subjects:
 - kind: ServiceAccount
  name: dgdr-profiling-job
  namespace: {{ .Release.Namespace }}
+{{- end }}
--- a/deploy/cloud/helm/platform/components/operator/values.yaml
+++ b/deploy/cloud/helm/platform/components/operator/values.yaml
@@ -117,15 +117,6 @@ dynamo:
    sshKeygen:
      enabled: true

-  # DynamoGraphDeploymentRequest (DGDR) configuration
-  dgdr:
-    # Container image to use for profiling jobs (both online and offline/AIC)
-    # REQUIRED: Must be set to create DynamoGraphDeploymentRequests
-    # For development: Build and push the profiler image from the ai-dynamo repository
-    # Public image will be available in release 0.6.1
-    # Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
-    profilerImage: ""
-

 #imagePullSecrets: []
 kubernetesClusterDomain: cluster.local