feat: add DGDR custom resource (#3489)

Signed-off-by: Julien Mancuso <jmancuso@nvidia.com> Signed-off-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

feat: add DGDR custom resource (#3489)
Signed-off-by: Julien Mancuso <jmancuso@nvidia.com> Signed-off-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: Hannah Zhang <hannahz@nvidia.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
57cdb9a1 · Julien Mancuso · GitHub · 66fd6f84 · 57cdb9a1 · 57cdb9a1
Unverified Commit 57cdb9a1 authored Oct 17, 2025 by Julien Mancuso Committed by GitHub Oct 17, 2025
20 changed files
--- a/benchmarks/profiler/profile_sla.py
+++ b/benchmarks/profiler/profile_sla.py
@@ -596,10 +596,11 @@ async def run_profile(args):
            try:
                await client.wait_for_deployment_ready()
                logger.info("Deployment is ready")
+
                skip_profile = False
            except TimeoutError:
                logger.error(
-                    "Deployment failed to become ready within timeout, skipping profiling"
+                    "Deployment or model failed to become ready within timeout, skipping profiling"
                )
                skip_profile = True


--- a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeploymentrequests.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamographdeploymentrequests.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoGraphDeploymentRequest
+    listKind: DynamoGraphDeploymentRequestList
+    plural: dynamographdeploymentrequests
+    shortNames:
+      - dgdr
+    singular: dynamographdeploymentrequest
+  scope: Namespaced
+  versions:
+    - additionalPrinterColumns:
+        - jsonPath: .spec.modelName
+          name: Model
+          type: string
+        - jsonPath: .spec.backend
+          name: Backend
+          type: string
+        - jsonPath: .status.state
+          name: State
+          type: string
+        - jsonPath: .status.deployment.state
+          name: DGD-State
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
+            It serves as the primary interface for users to request model deployments with
+            specific performance and resource constraints, enabling SLA-driven deployments.
+
+            Lifecycle:
+             1. Initial → Pending: Validates spec and prepares for profiling
+             2. Pending → Profiling: Creates and runs profiling job (online or AIC)
+             3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
+             4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
+             5. Ready: Terminal state when DGD is operational or spec is available
+             6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
+
+            The spec becomes immutable once profiling starts. Users must delete and recreate
+            the DGDR to modify configuration after this point.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: Spec defines the desired state for this deployment request.
+              properties:
+                autoApply:
+                  default: false
+                  description: |-
+                    AutoApply indicates whether to automatically create a DynamoGraphDeployment
+                    after profiling completes. If false, only the spec is generated and stored in status.
+                    Users can then manually create a DGD using the generated spec.
+                  type: boolean
+                backend:
+                  default: trtllm
+                  description: |-
+                    Backend specifies the inference backend framework to use.
+                    Supported values are: "vllm", "sglang", "trtllm".
+                  enum:
+                    - vllm
+                    - sglang
+                    - trtllm
+                  type: string
+                deploymentOverrides:
+                  description: |-
+                    DeploymentOverrides allows customizing metadata for the auto-created DGD.
+                    Only applicable when AutoApply is true.
+                  properties:
+                    annotations:
+                      additionalProperties:
+                        type: string
+                      description: Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
+                      type: object
+                    labels:
+                      additionalProperties:
+                        type: string
+                      description: |-
+                        Labels are additional labels to add to the DynamoGraphDeployment metadata.
+                        These are merged with auto-generated labels from the profiling process.
+                      type: object
+                    name:
+                      description: |-
+                        Name is the desired name for the created DynamoGraphDeployment.
+                        If not specified, defaults to the DGDR name.
+                      type: string
+                    namespace:
+                      description: |-
+                        Namespace is the desired namespace for the created DynamoGraphDeployment.
+                        If not specified, defaults to the DGDR namespace.
+                      type: string
+                  type: object
+                gpu:
+                  description: |-
+                    GPU defines optional GPU type and resource specifications.
+                    These constraints guide the profiler to find configurations within specified bounds.
+                  properties:
+                    maxNumGPUsPerEngine:
+                      default: 8
+                      description: |-
+                        MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
+                        The profiler will not consider configurations with more GPUs than this value.
+                      minimum: 1
+                      type: integer
+                    minNumGPUsPerEngine:
+                      default: 1
+                      description: |-
+                        MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
+                        The profiler will not consider configurations with fewer GPUs than this value.
+                      minimum: 1
+                      type: integer
+                    type:
+                      description: |-
+                        Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
+                        If specified, profiling will focus on configurations optimized for this GPU type.
+                      type: string
+                  type: object
+                modelName:
+                  description: |-
+                    ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
+                    This should be a valid model identifier that the profiler can resolve.
+                  type: string
+                online:
+                  default: false
+                  description: |-
+                    Online indicates whether to use online profiler (true) or AI Configurator (false).
+                    Online profiling uses real deployments for accurate measurements (2-4 hours).
+                    Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
+                  type: boolean
+                profilingConfig:
+                  description: |-
+                    ProfilingConfig provides custom configuration for the profiling job.
+                    Applicable to both online and offline (AIC) profiling modes.
+                  properties:
+                    configMapRef:
+                      description: |-
+                        ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
+                        The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
+                        This configuration is used by both online and offline (AIC) profiling modes.
+                      properties:
+                        key:
+                          default: disagg.yaml
+                          description: Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
+                          type: string
+                        name:
+                          description: Name of the ConfigMap containing the desired data.
+                          type: string
+                      required:
+                        - name
+                      type: object
+                  type: object
+                sla:
+                  description: |-
+                    SLA defines the Service Level Agreement profiling targets.
+                    The profiler uses these targets to find an optimal deployment configuration.
+                  properties:
+                    isl:
+                      default: 3000
+                      description: |-
+                        ISL is the Input Sequence Length for profiling.
+                        Defines the length of input sequences to use during profiling tests.
+                      minimum: 1
+                      type: integer
+                    itl:
+                      default: 10
+                      description: |-
+                        ITL is the target Inter-Token Latency in milliseconds.
+                        This represents the maximum time allowed between consecutive tokens in the output.
+                      type: integer
+                    osl:
+                      default: 500
+                      description: |-
+                        OSL is the Output Sequence Length for profiling.
+                        Defines the expected length of output sequences to generate during profiling tests.
+                      minimum: 1
+                      type: integer
+                    ttft:
+                      default: 50
+                      description: |-
+                        TTFT is the target Time To First Token in milliseconds.
+                        This represents the maximum time allowed from request submission to receiving the first token.
+                      type: integer
+                  type: object
+              required:
+                - modelName
+                - sla
+              type: object
+            status:
+              description: Status reflects the current observed state of this deployment request.
+              properties:
+                conditions:
+                  description: |-
+                    Conditions contains the latest observed conditions of the deployment request.
+                    Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
+                    Conditions are merged by type on patch updates.
+                  items:
+                    description: Condition contains details for one aspect of the current state of this API Resource.
+                    properties:
+                      lastTransitionTime:
+                        description: |-
+                          lastTransitionTime is the last time the condition transitioned from one status to another.
+                          This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                        format: date-time
+                        type: string
+                      message:
+                        description: |-
+                          message is a human readable message indicating details about the transition.
+                          This may be an empty string.
+                        maxLength: 32768
+                        type: string
+                      observedGeneration:
+                        description: |-
+                          observedGeneration represents the .metadata.generation that the condition was set based upon.
+                          For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                          with respect to the current state of the instance.
+                        format: int64
+                        minimum: 0
+                        type: integer
+                      reason:
+                        description: |-
+                          reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                          Producers of specific condition types may define expected values and meanings for this field,
+                          and whether the values are considered a guaranteed API.
+                          The value should be a CamelCase string.
+                          This field may not be empty.
+                        maxLength: 1024
+                        minLength: 1
+                        pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                        type: string
+                      status:
+                        description: status of the condition, one of True, False, Unknown.
+                        enum:
+                          - "True"
+                          - "False"
+                          - Unknown
+                        type: string
+                      type:
+                        description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                        maxLength: 316
+                        pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                        type: string
+                    required:
+                      - lastTransitionTime
+                      - message
+                      - reason
+                      - status
+                      - type
+                    type: object
+                  type: array
+                deployment:
+                  description: |-
+                    Deployment tracks the auto-created DGD when AutoApply is true.
+                    Contains name, namespace, state, and creation status of the managed DGD.
+                  properties:
+                    created:
+                      description: |-
+                        Created indicates whether the DGD has been successfully created.
+                        Used to prevent recreation if the DGD is manually deleted by users.
+                      type: boolean
+                    name:
+                      description: Name is the name of the created DynamoGraphDeployment.
+                      type: string
+                    namespace:
+                      description: Namespace is the namespace of the created DynamoGraphDeployment.
+                      type: string
+                    state:
+                      description: |-
+                        State is the current state of the DynamoGraphDeployment.
+                        This value is mirrored from the DGD's status.state field.
+                      type: string
+                  type: object
+                generatedDeployment:
+                  description: |-
+                    GeneratedDeployment contains the full generated DynamoGraphDeployment specification
+                    including metadata, based on profiling results. Users can extract this to create
+                    a DGD manually, or it's used automatically when autoApply is true.
+                    Stored as RawExtension to preserve all fields including metadata.
+                  type: object
+                  x-kubernetes-embedded-resource: true
+                  x-kubernetes-preserve-unknown-fields: true
+                observedGeneration:
+                  description: |-
+                    ObservedGeneration reflects the generation of the most recently observed spec.
+                    Used to detect spec changes and enforce immutability after profiling starts.
+                  format: int64
+                  type: integer
+                profilingResults:
+                  description: |-
+                    ProfilingResults contains a reference to the ConfigMap holding profiling data.
+                    Format: "configmap/<name>"
+                  type: string
+                state:
+                  description: |-
+                    State is a high-level textual status of the deployment request lifecycle.
+                    Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
+                    Empty string ("") represents the initial state before initialization.
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
--- a/deploy/cloud/helm/platform/README.md
+++ b/deploy/cloud/helm/platform/README.md
@@ -132,6 +132,7 @@ The chart includes built-in validation to prevent all operator conflicts:
 | dynamo-operator.dynamo.metrics.prometheusEndpoint | string | `""` | Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables |
 | dynamo-operator.dynamo.mpiRun.secretName | string | `"mpi-run-ssh-secret"` | Name of the secret containing the SSH key for MPI Run |
 | dynamo-operator.dynamo.mpiRun.sshKeygen.enabled | bool | `true` | Whether to enable SSH key generation for MPI Run |
+| dynamo-operator.dynamo.dgdr.profilerImage | string | `""` | Container image to use for profiling jobs (both online and offline/AIC) |
 | grove.enabled | bool | `false` | Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide |
 | kai-scheduler.enabled | bool | `false` | Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide |
 | etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd." |

--- a/deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/deployment.yaml
@@ -124,7 +124,11 @@ spec:
          - --mpi-run-ssh-secret-name={{ .Values.dynamo.mpiRun.secretName }}
          - --mpi-run-ssh-secret-namespace={{ .Release.Namespace }}
        {{- end }}
+        {{- if .Values.dynamo.dgdr.profilerImage }}
+          - --profiler-image={{ .Values.dynamo.dgdr.profilerImage }}
+        {{- end }}
        {{- if not .Values.namespaceRestriction.enabled }}
+          - --dgdr-profiling-cluster-role-name={{ include "dynamo-operator.fullname" . }}-dgdr-profiling
          - --planner-cluster-role-name={{ include "dynamo-operator.fullname" . }}-planner
        {{- end }}
        command:

--- a/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/manager-rbac.yaml
@@ -359,6 +359,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments
+  - dynamographdeploymentrequests
  - dynamographdeployments
  verbs:
  - create
@@ -372,6 +373,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments/finalizers
+  - dynamographdeploymentrequests/finalizers
  - dynamographdeployments/finalizers
  verbs:
  - update
@@ -379,6 +381,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments/status
+  - dynamographdeploymentrequests/status
  - dynamographdeployments/status
  verbs:
  - get

--- a/deploy/cloud/helm/platform/components/operator/templates/profiling-job-rbac.yaml
+++ b/deploy/cloud/helm/platform/components/operator/templates/profiling-job-rbac.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.namespaceRestriction.enabled }}
+# Namespace-restricted mode: Role + ServiceAccount + RoleBinding
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: dgdr-profiling-job
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "dynamo-operator.labels" . | nindent 4 }}
+    app.kubernetes.io/component: dgdr-profiling
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: dgdr-profiling-job
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "dynamo-operator.labels" . | nindent 4 }}
+    app.kubernetes.io/component: dgdr-profiling
+rules:
+# ConfigMaps - needed for saving profiling results
+- apiGroups: [""]
+  resources: ["configmaps"]
+  verbs: ["create", "get", "update", "patch", "delete"]
+# DynamoGraphDeploymentRequests - needed to get DGDR info
+- apiGroups: ["nvidia.com"]
+  resources: ["dynamographdeploymentrequests"]
+  verbs: ["get"]
+# DynamoGraphDeployments - needed for online profiling to create test deployments
+# The operator will handle creating the actual pods, services, and deployments
+- apiGroups: ["nvidia.com"]
+  resources: ["dynamographdeployments"]
+  verbs: ["get", "create", "delete", "list", "watch"]
+# Pods - needed for listing pods by label selector and getting logs from test deployments
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["list", "get"]
+- apiGroups: [""]
+  resources: ["pods/log"]
+  verbs: ["get"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: dgdr-profiling-job
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "dynamo-operator.labels" . | nindent 4 }}
+    app.kubernetes.io/component: dgdr-profiling
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: dgdr-profiling-job
+subjects:
+- kind: ServiceAccount
+  name: dgdr-profiling-job
+  namespace: {{ .Release.Namespace }}
+{{- else }}
+# Cluster-wide mode: ClusterRole for DGDR profiling jobs
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling
+  labels:
+    {{- include "dynamo-operator.labels" . | nindent 4 }}
+    app.kubernetes.io/component: dgdr-profiling
+rules:
+# ConfigMaps - needed for saving profiling results
+- apiGroups: [""]
+  resources: ["configmaps"]
+  verbs: ["create", "get", "update", "patch", "delete"]
+# DynamoGraphDeploymentRequests - needed to get DGDR info
+- apiGroups: ["nvidia.com"]
+  resources: ["dynamographdeploymentrequests"]
+  verbs: ["get"]
+# DynamoGraphDeployments - needed for online profiling to create test deployments
+# The operator will handle creating the actual pods, services, and deployments
+- apiGroups: ["nvidia.com"]
+  resources: ["dynamographdeployments"]
+  verbs: ["get", "create", "delete", "list", "watch"]
+# Pods - needed for listing pods by label selector and getting logs from test deployments
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["list", "get"]
+- apiGroups: [""]
+  resources: ["pods/log"]
+  verbs: ["get"]
+{{- end }}
+# (Remove the trailing blank line at end of file)
--- a/deploy/cloud/helm/platform/components/operator/values.yaml
+++ b/deploy/cloud/helm/platform/components/operator/values.yaml
@@ -117,6 +117,15 @@ dynamo:
    sshKeygen:
      enabled: true

+  # DynamoGraphDeploymentRequest (DGDR) configuration
+  dgdr:
+    # Container image to use for profiling jobs (both online and offline/AIC)
+    # REQUIRED: Must be set to create DynamoGraphDeploymentRequests
+    # For development: Build and push the profiler image from the ai-dynamo repository
+    # Public image will be available in release 0.6.1
+    # Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
+    profilerImage: ""
+

 #imagePullSecrets: []
 kubernetesClusterDomain: cluster.local

--- a/deploy/cloud/helm/platform/values.yaml
+++ b/deploy/cloud/helm/platform/values.yaml
@@ -135,6 +135,15 @@ dynamo-operator:
        # -- Whether to enable SSH key generation for MPI Run
        enabled: true

+    # DynamoGraphDeploymentRequest (DGDR) configuration
+    dgdr:
+      # -- Container image to use for profiling jobs (both online and offline/AIC)
+      # REQUIRED: Must be set to create DynamoGraphDeploymentRequests
+      # For development: Build and push the profiler image from the ai-dynamo repository
+      # Public image will be available in release 0.6.1
+      # Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
+      profilerImage: ""
+

 # Grove component - distributed inference orchestration
 grove:

--- a/deploy/cloud/operator/README.md
+++ b/deploy/cloud/operator/README.md
@@ -5,10 +5,19 @@ A Kubernetes Operator to manage all Dynamo pipelines using custom resources.

 ## Overview

-This operator automates the deployment and lifecycle management of `DynamoGraphDeployment` resources in Kubernetes clusters.
+This operator automates the deployment and lifecycle management of Dynamo resources in Kubernetes clusters:
+
+- **DynamoGraphDeploymentRequest (DGDR)** - Simplified SLA-driven deployment interface
+- **DynamoGraphDeployment (DGD)** - Direct deployment configuration

 Built with [Kubebuilder](https://book.kubebuilder.io/), it follows Kubernetes best practices and supports declarative configuration through CustomResourceDefinitions (CRDs).

+### Custom Resources
+
+- **DynamoGraphDeploymentRequest**: High-level interface for SLA-driven configuration generation. Automatically handles profiling and generates an optimized DGD spec based on your performance requirements.
+- **DynamoGraphDeployment**: Lower-level interface for direct deployment configuration with full control over all parameters.
+
+
 ## Developer guide

 ### Pre-requisites

--- a/deploy/cloud/operator/api/v1alpha1/dynamographdeploymentrequest_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamographdeploymentrequest_types.go
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API group.
+
+This package defines the DynamoGraphDeploymentRequest (DGDR) custom resource, which provides
+a high-level, SLA-driven interface for deploying machine learning models on Dynamo.
+*/
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN!
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// SLASpec defines Service Level Agreement targets for model profiling and deployment.
+// These targets guide the profiling process to find optimal deployment configurations
+// that meet the specified performance requirements.
+type SLASpec struct {
+	// ITL is the target Inter-Token Latency in milliseconds.
+	// This represents the maximum time allowed between consecutive tokens in the output.
+	// +kubebuilder:default=10
+	// +optional
+	ITL int `json:"itl,omitempty"`
+
+	// TTFT is the target Time To First Token in milliseconds.
+	// This represents the maximum time allowed from request submission to receiving the first token.
+	// +kubebuilder:default=50
+	// +optional
+	TTFT int `json:"ttft,omitempty"`
+
+	// ISL is the Input Sequence Length for profiling.
+	// Defines the length of input sequences to use during profiling tests.
+	// +kubebuilder:default=3000
+	// +kubebuilder:validation:Minimum=1
+	// +optional
+	ISL int `json:"isl,omitempty"`
+
+	// OSL is the Output Sequence Length for profiling.
+	// Defines the expected length of output sequences to generate during profiling tests.
+	// +kubebuilder:default=500
+	// +kubebuilder:validation:Minimum=1
+	// +optional
+	OSL int `json:"osl,omitempty"`
+}
+
+// GPUSpec defines optional GPU type and resource specifications for profiling and deployment.
+// These constraints help narrow down the search space during profiling to find configurations
+// that fit within specified hardware bounds.
+type GPUSpec struct {
+	// Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
+	// If specified, profiling will focus on configurations optimized for this GPU type.
+	// +kubebuilder:validation:Optional
+	Type string `json:"type,omitempty"`
+
+	// MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
+	// The profiler will not consider configurations with fewer GPUs than this value.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:default=1
+	MinNumGPUsPerEngine int `json:"minNumGPUsPerEngine,omitempty"`
+
+	// MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
+	// The profiler will not consider configurations with more GPUs than this value.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:default=8
+	MaxNumGPUsPerEngine int `json:"maxNumGPUsPerEngine,omitempty"`
+}
+
+// ConfigMapKeySelector selects a specific key from a ConfigMap.
+// Used to reference external configuration data stored in ConfigMaps.
+type ConfigMapKeySelector struct {
+	// Name of the ConfigMap containing the desired data.
+	// +kubebuilder:validation:Required
+	Name string `json:"name"`
+
+	// Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
+	// +kubebuilder:default=disagg.yaml
+	Key string `json:"key,omitempty"`
+}
+
+// ProfilingConfigSpec defines configuration for the profiling process.
+// Allows users to provide custom profiling parameters via ConfigMap references.
+type ProfilingConfigSpec struct {
+	// ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
+	// The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
+	// This configuration is used by both online and offline (AIC) profiling modes.
+	// +kubebuilder:validation:Optional
+	ConfigMapRef *ConfigMapKeySelector `json:"configMapRef,omitempty"`
+}
+
+// DeploymentOverridesSpec allows users to customize metadata for auto-created DynamoGraphDeployments.
+// When autoApply is enabled, these overrides are applied to the generated DGD resource.
+type DeploymentOverridesSpec struct {
+	// Name is the desired name for the created DynamoGraphDeployment.
+	// If not specified, defaults to the DGDR name.
+	// +kubebuilder:validation:Optional
+	Name string `json:"name,omitempty"`
+
+	// Namespace is the desired namespace for the created DynamoGraphDeployment.
+	// If not specified, defaults to the DGDR namespace.
+	// +kubebuilder:validation:Optional
+	Namespace string `json:"namespace,omitempty"`
+
+	// Labels are additional labels to add to the DynamoGraphDeployment metadata.
+	// These are merged with auto-generated labels from the profiling process.
+	// +kubebuilder:validation:Optional
+	Labels map[string]string `json:"labels,omitempty"`
+
+	// Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
+	// +kubebuilder:validation:Optional
+	Annotations map[string]string `json:"annotations,omitempty"`
+}
+
+// DynamoGraphDeploymentRequestSpec defines the desired state of a DynamoGraphDeploymentRequest.
+// This CRD serves as the primary interface for users to request model deployments with
+// specific performance constraints and resource requirements, enabling SLA-driven deployments.
+type DynamoGraphDeploymentRequestSpec struct {
+	// ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
+	// This should be a valid model identifier that the profiler can resolve.
+	// +kubebuilder:validation:Required
+	ModelName string `json:"modelName"`
+
+	// Backend specifies the inference backend framework to use.
+	// Supported values are: "vllm", "sglang", "trtllm".
+	// +kubebuilder:validation:Enum=vllm;sglang;trtllm
+	// +kubebuilder:default=trtllm
+	Backend string `json:"backend,omitempty"`
+
+	// SLA defines the Service Level Agreement profiling targets.
+	// The profiler uses these targets to find an optimal deployment configuration.
+	// +kubebuilder:validation:Required
+	SLA SLASpec `json:"sla"`
+
+	// GPU defines optional GPU type and resource specifications.
+	// These constraints guide the profiler to find configurations within specified bounds.
+	// +kubebuilder:validation:Optional
+	GPU *GPUSpec `json:"gpu,omitempty"`
+
+	// Online indicates whether to use online profiler (true) or AI Configurator (false).
+	// Online profiling uses real deployments for accurate measurements (2-4 hours).
+	// Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
+	// +kubebuilder:default=false
+	Online bool `json:"online,omitempty"`
+
+	// AutoApply indicates whether to automatically create a DynamoGraphDeployment
+	// after profiling completes. If false, only the spec is generated and stored in status.
+	// Users can then manually create a DGD using the generated spec.
+	// +kubebuilder:default=false
+	AutoApply bool `json:"autoApply,omitempty"`
+
+	// DeploymentOverrides allows customizing metadata for the auto-created DGD.
+	// Only applicable when AutoApply is true.
+	// +kubebuilder:validation:Optional
+	DeploymentOverrides *DeploymentOverridesSpec `json:"deploymentOverrides,omitempty"`
+
+	// ProfilingConfig provides custom configuration for the profiling job.
+	// Applicable to both online and offline (AIC) profiling modes.
+	// +kubebuilder:validation:Optional
+	ProfilingConfig *ProfilingConfigSpec `json:"profilingConfig,omitempty"`
+}
+
+// DeploymentStatus tracks the state of an auto-created DynamoGraphDeployment.
+// This status is populated when autoApply is enabled and a DGD is created.
+type DeploymentStatus struct {
+	// Name is the name of the created DynamoGraphDeployment.
+	Name string `json:"name,omitempty"`
+
+	// Namespace is the namespace of the created DynamoGraphDeployment.
+	Namespace string `json:"namespace,omitempty"`
+
+	// State is the current state of the DynamoGraphDeployment.
+	// This value is mirrored from the DGD's status.state field.
+	State string `json:"state,omitempty"`
+
+	// Created indicates whether the DGD has been successfully created.
+	// Used to prevent recreation if the DGD is manually deleted by users.
+	Created bool `json:"created,omitempty"`
+}
+
+// DynamoGraphDeploymentRequestStatus represents the observed state of a DynamoGraphDeploymentRequest.
+// The controller updates this status as the DGDR progresses through its lifecycle.
+type DynamoGraphDeploymentRequestStatus struct {
+	// State is a high-level textual status of the deployment request lifecycle.
+	// Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
+	// Empty string ("") represents the initial state before initialization.
+	State string `json:"state,omitempty"`
+
+	// ObservedGeneration reflects the generation of the most recently observed spec.
+	// Used to detect spec changes and enforce immutability after profiling starts.
+	ObservedGeneration int64 `json:"observedGeneration,omitempty"`
+
+	// Conditions contains the latest observed conditions of the deployment request.
+	// Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
+	// Conditions are merged by type on patch updates.
+	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
+
+	// ProfilingResults contains a reference to the ConfigMap holding profiling data.
+	// Format: "configmap/<name>"
+	// +kubebuilder:validation:Optional
+	ProfilingResults string `json:"profilingResults,omitempty"`
+
+	// GeneratedDeployment contains the full generated DynamoGraphDeployment specification
+	// including metadata, based on profiling results. Users can extract this to create
+	// a DGD manually, or it's used automatically when autoApply is true.
+	// Stored as RawExtension to preserve all fields including metadata.
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:pruning:PreserveUnknownFields
+	// +kubebuilder:validation:EmbeddedResource
+	GeneratedDeployment *runtime.RawExtension `json:"generatedDeployment,omitempty"`
+
+	// Deployment tracks the auto-created DGD when AutoApply is true.
+	// Contains name, namespace, state, and creation status of the managed DGD.
+	// +kubebuilder:validation:Optional
+	Deployment *DeploymentStatus `json:"deployment,omitempty"`
+}
+
+// DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
+// It serves as the primary interface for users to request model deployments with
+// specific performance and resource constraints, enabling SLA-driven deployments.
+//
+// Lifecycle:
+//  1. Initial → Pending: Validates spec and prepares for profiling
+//  2. Pending → Profiling: Creates and runs profiling job (online or AIC)
+//  3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
+//  4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
+//  5. Ready: Terminal state when DGD is operational or spec is available
+//  6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
+//
+// The spec becomes immutable once profiling starts. Users must delete and recreate
+// the DGDR to modify configuration after this point.
+//
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:shortName=dgdr
+// +kubebuilder:printcolumn:name="Model",type=string,JSONPath=`.spec.modelName`
+// +kubebuilder:printcolumn:name="Backend",type=string,JSONPath=`.spec.backend`
+// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state`
+// +kubebuilder:printcolumn:name="DGD-State",type=string,JSONPath=`.status.deployment.state`
+// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+type DynamoGraphDeploymentRequest struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	// Spec defines the desired state for this deployment request.
+	Spec DynamoGraphDeploymentRequestSpec `json:"spec,omitempty"`
+
+	// Status reflects the current observed state of this deployment request.
+	Status DynamoGraphDeploymentRequestStatus `json:"status,omitempty"`
+}
+
+// SetState updates the State field in the DGDR status.
+func (s *DynamoGraphDeploymentRequest) SetState(state string) {
+	s.Status.State = state
+}
+
+// GetSpec returns the spec of this DGDR as a generic interface.
+// Implements a common interface used by controller utilities.
+func (s *DynamoGraphDeploymentRequest) GetSpec() any {
+	return s.Spec
+}
+
+// SetSpec updates the spec of this DGDR from a generic interface value.
+// Implements a common interface used by controller utilities.
+func (s *DynamoGraphDeploymentRequest) SetSpec(spec any) {
+	s.Spec = spec.(DynamoGraphDeploymentRequestSpec)
+}
+
+// AddStatusCondition adds or updates a condition in the status.
+// If a condition with the same type already exists, it replaces it.
+// Otherwise, it appends the new condition to the list.
+func (s *DynamoGraphDeploymentRequest) AddStatusCondition(condition metav1.Condition) {
+	if s.Status.Conditions == nil {
+		s.Status.Conditions = []metav1.Condition{}
+	}
+	// Check if condition with same type already exists
+	for i, existingCondition := range s.Status.Conditions {
+		if existingCondition.Type == condition.Type {
+			// Replace the existing condition
+			s.Status.Conditions[i] = condition
+			return
+		}
+	}
+	// If no matching condition found, append the new one
+	s.Status.Conditions = append(s.Status.Conditions, condition)
+}
+
+// DynamoGraphDeploymentRequestList contains a list of DynamoGraphDeploymentRequest resources.
+//
+// +kubebuilder:object:root=true
+type DynamoGraphDeploymentRequestList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []DynamoGraphDeploymentRequest `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&DynamoGraphDeploymentRequest{}, &DynamoGraphDeploymentRequestList{})
+}
--- a/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -42,7 +42,7 @@ import (
 	"k8s.io/api/autoscaling/v2"
 	"k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	runtime "k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime"
 )

 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
@@ -114,6 +114,65 @@ func (in *BaseStatus) DeepCopy() *BaseStatus {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ConfigMapKeySelector) DeepCopyInto(out *ConfigMapKeySelector) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigMapKeySelector.
+func (in *ConfigMapKeySelector) DeepCopy() *ConfigMapKeySelector {
+	if in == nil {
+		return nil
+	}
+	out := new(ConfigMapKeySelector)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DeploymentOverridesSpec) DeepCopyInto(out *DeploymentOverridesSpec) {
+	*out = *in
+	if in.Labels != nil {
+		in, out := &in.Labels, &out.Labels
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+	if in.Annotations != nil {
+		in, out := &in.Annotations, &out.Annotations
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentOverridesSpec.
+func (in *DeploymentOverridesSpec) DeepCopy() *DeploymentOverridesSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(DeploymentOverridesSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DeploymentStatus) DeepCopyInto(out *DeploymentStatus) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentStatus.
+func (in *DeploymentStatus) DeepCopy() *DeploymentStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(DeploymentStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DynamoComponentDeployment) DeepCopyInto(out *DynamoComponentDeployment) {
 	*out = *in
@@ -378,6 +437,128 @@ func (in *DynamoGraphDeploymentList) DeepCopyObject() runtime.Object {
 	return nil
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentRequest) DeepCopyInto(out *DynamoGraphDeploymentRequest) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequest.
+func (in *DynamoGraphDeploymentRequest) DeepCopy() *DynamoGraphDeploymentRequest {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentRequest)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *DynamoGraphDeploymentRequest) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentRequestList) DeepCopyInto(out *DynamoGraphDeploymentRequestList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]DynamoGraphDeploymentRequest, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestList.
+func (in *DynamoGraphDeploymentRequestList) DeepCopy() *DynamoGraphDeploymentRequestList {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentRequestList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *DynamoGraphDeploymentRequestList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentRequestSpec) DeepCopyInto(out *DynamoGraphDeploymentRequestSpec) {
+	*out = *in
+	out.SLA = in.SLA
+	if in.GPU != nil {
+		in, out := &in.GPU, &out.GPU
+		*out = new(GPUSpec)
+		**out = **in
+	}
+	if in.DeploymentOverrides != nil {
+		in, out := &in.DeploymentOverrides, &out.DeploymentOverrides
+		*out = new(DeploymentOverridesSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ProfilingConfig != nil {
+		in, out := &in.ProfilingConfig, &out.ProfilingConfig
+		*out = new(ProfilingConfigSpec)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestSpec.
+func (in *DynamoGraphDeploymentRequestSpec) DeepCopy() *DynamoGraphDeploymentRequestSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentRequestSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeploymentRequestStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]metav1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	if in.GeneratedDeployment != nil {
+		in, out := &in.GeneratedDeployment, &out.GeneratedDeployment
+		*out = new(runtime.RawExtension)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Deployment != nil {
+		in, out := &in.Deployment, &out.Deployment
+		*out = new(DeploymentStatus)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestStatus.
+func (in *DynamoGraphDeploymentRequestStatus) DeepCopy() *DynamoGraphDeploymentRequestStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(DynamoGraphDeploymentRequestStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) {
 	*out = *in
@@ -445,6 +626,21 @@ func (in *DynamoGraphDeploymentStatus) DeepCopy() *DynamoGraphDeploymentStatus {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GPUSpec) DeepCopyInto(out *GPUSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUSpec.
+func (in *GPUSpec) DeepCopy() *GPUSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(GPUSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
 	*out = *in
@@ -555,6 +751,41 @@ func (in *PVC) DeepCopy() *PVC {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ProfilingConfigSpec) DeepCopyInto(out *ProfilingConfigSpec) {
+	*out = *in
+	if in.ConfigMapRef != nil {
+		in, out := &in.ConfigMapRef, &out.ConfigMapRef
+		*out = new(ConfigMapKeySelector)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProfilingConfigSpec.
+func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ProfilingConfigSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SLASpec) DeepCopyInto(out *SLASpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SLASpec.
+func (in *SLASpec) DeepCopy() *SLASpec {
+	if in == nil {
+		return nil
+	}
+	out := new(SLASpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
 	*out = *in

--- a/deploy/cloud/operator/cmd/main.go
+++ b/deploy/cloud/operator/cmd/main.go
@@ -140,6 +140,8 @@ func main() {
 	var mpiRunSecretName string
 	var mpiRunSecretNamespace string
 	var plannerClusterRoleName string
+	var profilerImage string
+	var dgdrProfilingClusterRoleName string
 	flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
 	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
 	flag.BoolVar(&enableLeaderElection, "leader-elect", false,
@@ -180,6 +182,10 @@ func main() {
 		"Namespace where the MPI SSH secret is located (required)")
 	flag.StringVar(&plannerClusterRoleName, "planner-cluster-role-name", "",
 		"Name of the ClusterRole for planner (cluster-wide mode only)")
+	flag.StringVar(&profilerImage, "profiler-image", "",
+		"Container image to use for profiling jobs (both online and offline/AIC) (for DynamoGraphDeploymentRequest)")
+	flag.StringVar(&dgdrProfilingClusterRoleName, "dgdr-profiling-cluster-role-name", "",
+		"Name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)")
 	opts := zap.Options{
 		Development: true,
 	}
@@ -237,6 +243,7 @@ func main() {
 		},
 		RBAC: commonController.RBACConfig{
 			PlannerClusterRoleName:       plannerClusterRoleName,
+			DGDRProfilingClusterRoleName: dgdrProfilingClusterRoleName,
 		},
 	}

@@ -449,6 +456,17 @@ func main() {
 		setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment")
 		os.Exit(1)
 	}
+
+	if err = (&controller.DynamoGraphDeploymentRequestReconciler{
+		Client:        mgr.GetClient(),
+		Recorder:      mgr.GetEventRecorderFor("dynamographdeploymentrequest"),
+		ProfilerImage: profilerImage,
+		Config:        ctrlConfig,
+		RBACManager:   rbacManager,
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeploymentRequest")
+		os.Exit(1)
+	}
 	//+kubebuilder:scaffold:builder

 	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {

--- a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamographdeploymentrequests.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoGraphDeploymentRequest
+    listKind: DynamoGraphDeploymentRequestList
+    plural: dynamographdeploymentrequests
+    shortNames:
+      - dgdr
+    singular: dynamographdeploymentrequest
+  scope: Namespaced
+  versions:
+    - additionalPrinterColumns:
+        - jsonPath: .spec.modelName
+          name: Model
+          type: string
+        - jsonPath: .spec.backend
+          name: Backend
+          type: string
+        - jsonPath: .status.state
+          name: State
+          type: string
+        - jsonPath: .status.deployment.state
+          name: DGD-State
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      name: v1alpha1
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
+            It serves as the primary interface for users to request model deployments with
+            specific performance and resource constraints, enabling SLA-driven deployments.
+
+            Lifecycle:
+             1. Initial → Pending: Validates spec and prepares for profiling
+             2. Pending → Profiling: Creates and runs profiling job (online or AIC)
+             3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
+             4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
+             5. Ready: Terminal state when DGD is operational or spec is available
+             6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
+
+            The spec becomes immutable once profiling starts. Users must delete and recreate
+            the DGDR to modify configuration after this point.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: Spec defines the desired state for this deployment request.
+              properties:
+                autoApply:
+                  default: false
+                  description: |-
+                    AutoApply indicates whether to automatically create a DynamoGraphDeployment
+                    after profiling completes. If false, only the spec is generated and stored in status.
+                    Users can then manually create a DGD using the generated spec.
+                  type: boolean
+                backend:
+                  default: trtllm
+                  description: |-
+                    Backend specifies the inference backend framework to use.
+                    Supported values are: "vllm", "sglang", "trtllm".
+                  enum:
+                    - vllm
+                    - sglang
+                    - trtllm
+                  type: string
+                deploymentOverrides:
+                  description: |-
+                    DeploymentOverrides allows customizing metadata for the auto-created DGD.
+                    Only applicable when AutoApply is true.
+                  properties:
+                    annotations:
+                      additionalProperties:
+                        type: string
+                      description: Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
+                      type: object
+                    labels:
+                      additionalProperties:
+                        type: string
+                      description: |-
+                        Labels are additional labels to add to the DynamoGraphDeployment metadata.
+                        These are merged with auto-generated labels from the profiling process.
+                      type: object
+                    name:
+                      description: |-
+                        Name is the desired name for the created DynamoGraphDeployment.
+                        If not specified, defaults to the DGDR name.
+                      type: string
+                    namespace:
+                      description: |-
+                        Namespace is the desired namespace for the created DynamoGraphDeployment.
+                        If not specified, defaults to the DGDR namespace.
+                      type: string
+                  type: object
+                gpu:
+                  description: |-
+                    GPU defines optional GPU type and resource specifications.
+                    These constraints guide the profiler to find configurations within specified bounds.
+                  properties:
+                    maxNumGPUsPerEngine:
+                      default: 8
+                      description: |-
+                        MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
+                        The profiler will not consider configurations with more GPUs than this value.
+                      minimum: 1
+                      type: integer
+                    minNumGPUsPerEngine:
+                      default: 1
+                      description: |-
+                        MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
+                        The profiler will not consider configurations with fewer GPUs than this value.
+                      minimum: 1
+                      type: integer
+                    type:
+                      description: |-
+                        Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
+                        If specified, profiling will focus on configurations optimized for this GPU type.
+                      type: string
+                  type: object
+                modelName:
+                  description: |-
+                    ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
+                    This should be a valid model identifier that the profiler can resolve.
+                  type: string
+                online:
+                  default: false
+                  description: |-
+                    Online indicates whether to use online profiler (true) or AI Configurator (false).
+                    Online profiling uses real deployments for accurate measurements (2-4 hours).
+                    Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
+                  type: boolean
+                profilingConfig:
+                  description: |-
+                    ProfilingConfig provides custom configuration for the profiling job.
+                    Applicable to both online and offline (AIC) profiling modes.
+                  properties:
+                    configMapRef:
+                      description: |-
+                        ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
+                        The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
+                        This configuration is used by both online and offline (AIC) profiling modes.
+                      properties:
+                        key:
+                          default: disagg.yaml
+                          description: Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
+                          type: string
+                        name:
+                          description: Name of the ConfigMap containing the desired data.
+                          type: string
+                      required:
+                        - name
+                      type: object
+                  type: object
+                sla:
+                  description: |-
+                    SLA defines the Service Level Agreement profiling targets.
+                    The profiler uses these targets to find an optimal deployment configuration.
+                  properties:
+                    isl:
+                      default: 3000
+                      description: |-
+                        ISL is the Input Sequence Length for profiling.
+                        Defines the length of input sequences to use during profiling tests.
+                      minimum: 1
+                      type: integer
+                    itl:
+                      default: 10
+                      description: |-
+                        ITL is the target Inter-Token Latency in milliseconds.
+                        This represents the maximum time allowed between consecutive tokens in the output.
+                      type: integer
+                    osl:
+                      default: 500
+                      description: |-
+                        OSL is the Output Sequence Length for profiling.
+                        Defines the expected length of output sequences to generate during profiling tests.
+                      minimum: 1
+                      type: integer
+                    ttft:
+                      default: 50
+                      description: |-
+                        TTFT is the target Time To First Token in milliseconds.
+                        This represents the maximum time allowed from request submission to receiving the first token.
+                      type: integer
+                  type: object
+              required:
+                - modelName
+                - sla
+              type: object
+            status:
+              description: Status reflects the current observed state of this deployment request.
+              properties:
+                conditions:
+                  description: |-
+                    Conditions contains the latest observed conditions of the deployment request.
+                    Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
+                    Conditions are merged by type on patch updates.
+                  items:
+                    description: Condition contains details for one aspect of the current state of this API Resource.
+                    properties:
+                      lastTransitionTime:
+                        description: |-
+                          lastTransitionTime is the last time the condition transitioned from one status to another.
+                          This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                        format: date-time
+                        type: string
+                      message:
+                        description: |-
+                          message is a human readable message indicating details about the transition.
+                          This may be an empty string.
+                        maxLength: 32768
+                        type: string
+                      observedGeneration:
+                        description: |-
+                          observedGeneration represents the .metadata.generation that the condition was set based upon.
+                          For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                          with respect to the current state of the instance.
+                        format: int64
+                        minimum: 0
+                        type: integer
+                      reason:
+                        description: |-
+                          reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                          Producers of specific condition types may define expected values and meanings for this field,
+                          and whether the values are considered a guaranteed API.
+                          The value should be a CamelCase string.
+                          This field may not be empty.
+                        maxLength: 1024
+                        minLength: 1
+                        pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                        type: string
+                      status:
+                        description: status of the condition, one of True, False, Unknown.
+                        enum:
+                          - "True"
+                          - "False"
+                          - Unknown
+                        type: string
+                      type:
+                        description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                        maxLength: 316
+                        pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                        type: string
+                    required:
+                      - lastTransitionTime
+                      - message
+                      - reason
+                      - status
+                      - type
+                    type: object
+                  type: array
+                deployment:
+                  description: |-
+                    Deployment tracks the auto-created DGD when AutoApply is true.
+                    Contains name, namespace, state, and creation status of the managed DGD.
+                  properties:
+                    created:
+                      description: |-
+                        Created indicates whether the DGD has been successfully created.
+                        Used to prevent recreation if the DGD is manually deleted by users.
+                      type: boolean
+                    name:
+                      description: Name is the name of the created DynamoGraphDeployment.
+                      type: string
+                    namespace:
+                      description: Namespace is the namespace of the created DynamoGraphDeployment.
+                      type: string
+                    state:
+                      description: |-
+                        State is the current state of the DynamoGraphDeployment.
+                        This value is mirrored from the DGD's status.state field.
+                      type: string
+                  type: object
+                generatedDeployment:
+                  description: |-
+                    GeneratedDeployment contains the full generated DynamoGraphDeployment specification
+                    including metadata, based on profiling results. Users can extract this to create
+                    a DGD manually, or it's used automatically when autoApply is true.
+                    Stored as RawExtension to preserve all fields including metadata.
+                  type: object
+                  x-kubernetes-embedded-resource: true
+                  x-kubernetes-preserve-unknown-fields: true
+                observedGeneration:
+                  description: |-
+                    ObservedGeneration reflects the generation of the most recently observed spec.
+                    Used to detect spec changes and enforce immutability after profiling starts.
+                  format: int64
+                  type: integer
+                profilingResults:
+                  description: |-
+                    ProfilingResults contains a reference to the ConfigMap holding profiling data.
+                    Format: "configmap/<name>"
+                  type: string
+                state:
+                  description: |-
+                    State is a high-level textual status of the deployment request lifecycle.
+                    Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
+                    Empty string ("") represents the initial state before initialization.
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
--- a/deploy/cloud/operator/config/rbac/role.yaml
+++ b/deploy/cloud/operator/config/rbac/role.yaml
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
@@ -74,6 +73,18 @@ rules:
  - patch
  - update
  - watch
+- apiGroups:
+  - batch
+  resources:
+  - jobs
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
  - coordination.k8s.io
  resources:
@@ -160,6 +171,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments
+  - dynamographdeploymentrequests
  - dynamographdeployments
  verbs:
  - create
@@ -173,6 +185,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments/finalizers
+  - dynamographdeploymentrequests/finalizers
  - dynamographdeployments/finalizers
  verbs:
  - update
@@ -180,6 +193,7 @@ rules:
  - nvidia.com
  resources:
  - dynamocomponentdeployments/status
+  - dynamographdeploymentrequests/status
  - dynamographdeployments/status
  verbs:
  - get

--- a/deploy/cloud/operator/config/samples/kustomization.yaml
+++ b/deploy/cloud/operator/config/samples/kustomization.yaml
@@ -18,4 +18,5 @@ resources:
 - nvidia.com_v1alpha1_dynamocomponentdeployment.yaml
 - nvidia.com_v1alpha1_dynamocomponent.yaml
 - nvidia.com_v1alpha1_dynamographdeployment.yaml
+- nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
 #+kubebuilder:scaffold:manifestskustomizesamples
--- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeploymentRequest
+metadata:
+  name: example-llm-sla
+spec:
+  modelName: "meta/llama3-70b"
+  backend: trtllm # enum: [vllm, sglang, trtllm]; default is trtllm
+  sla: # SLA profiling targets (all fields optional with defaults)
+    itl: 10    # Inter-Token Latency target in milliseconds (default: 10)
+    ttft: 50   # Time To First Token target in milliseconds (default: 50)
+    isl: 3000  # Input Sequence Length (default: 3000)
+    osl: 500   # Output Sequence Length (default: 500)
+  gpu: # optional
+    type: h200_sxm
+    minNumGPUsPerEngine: 1  # default is 1
+    maxNumGPUsPerEngine: 8  # default is 8
+  online: false # true for online profiler, false for AIC profiler
+
+  # Optional: Automatically create DynamoGraphDeployment after profiling
+  autoApply: true  # default is false
+
+  # Optional: Override metadata for auto-created DGD (only used when autoApply: true)
+  # deploymentOverrides:
+  #   name: my-custom-dgd-name
+  #   namespace: production
+  #   labels:
+  #     team: ml-platform
+  #   annotations:
+  #     description: "Auto-generated from DGDR"
+
+  # Currently required for both online and offline/AIC profiling, but will be removed in the future
+  profilingConfig:
+    configMapRef:
+      name: my-profiling-config
+      key: disagg.yaml  # default is "disagg.yaml"
--- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller.go
--- a/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeploymentrequest_controller_test.go
--- a/deploy/cloud/operator/internal/controller/suite_test.go
+++ b/deploy/cloud/operator/internal/controller/suite_test.go
@@ -80,7 +80,7 @@ var _ = BeforeSuite(func() {
 			filepath.Join(".", "testing", "run.ai"),
 			filepath.Join(".", "testing", "nvidia"),
 		},
-		ErrorIfCRDPathMissing: true,
+		ErrorIfCRDPathMissing: false,

 		// The BinaryAssetsDirectory is only required if you want to run the tests directly
 		// without call the makefile target test. If not informed it will look for the

--- a/deploy/cloud/operator/internal/controller_common/predicate.go
+++ b/deploy/cloud/operator/internal/controller_common/predicate.go
@@ -74,6 +74,8 @@ type Config struct {
 type RBACConfig struct {
 	// PlannerClusterRoleName is the name of the ClusterRole for planner (cluster-wide mode only)
 	PlannerClusterRoleName string
+	// DGDRProfilingClusterRoleName is the name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)
+	DGDRProfilingClusterRoleName string
 }

 type IngressConfig struct {