"lib/runtime/vscode:/vscode.git/clone" did not exist on "6fe2152bfca7bb1fa681eae957b252c6cd0b7a53"
Unverified Commit 57cdb9a1 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files
parent 66fd6f84
......@@ -596,10 +596,11 @@ async def run_profile(args):
try:
await client.wait_for_deployment_ready()
logger.info("Deployment is ready")
skip_profile = False
except TimeoutError:
logger.error(
"Deployment failed to become ready within timeout, skipping profiling"
"Deployment or model failed to become ready within timeout, skipping profiling"
)
skip_profile = True
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamographdeploymentrequests.nvidia.com
spec:
group: nvidia.com
names:
kind: DynamoGraphDeploymentRequest
listKind: DynamoGraphDeploymentRequestList
plural: dynamographdeploymentrequests
shortNames:
- dgdr
singular: dynamographdeploymentrequest
scope: Namespaced
versions:
- additionalPrinterColumns:
- jsonPath: .spec.modelName
name: Model
type: string
- jsonPath: .spec.backend
name: Backend
type: string
- jsonPath: .status.state
name: State
type: string
- jsonPath: .status.deployment.state
name: DGD-State
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: |-
DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
It serves as the primary interface for users to request model deployments with
specific performance and resource constraints, enabling SLA-driven deployments.
Lifecycle:
1. Initial → Pending: Validates spec and prepares for profiling
2. Pending → Profiling: Creates and runs profiling job (online or AIC)
3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
5. Ready: Terminal state when DGD is operational or spec is available
6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
The spec becomes immutable once profiling starts. Users must delete and recreate
the DGDR to modify configuration after this point.
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: Spec defines the desired state for this deployment request.
properties:
autoApply:
default: false
description: |-
AutoApply indicates whether to automatically create a DynamoGraphDeployment
after profiling completes. If false, only the spec is generated and stored in status.
Users can then manually create a DGD using the generated spec.
type: boolean
backend:
default: trtllm
description: |-
Backend specifies the inference backend framework to use.
Supported values are: "vllm", "sglang", "trtllm".
enum:
- vllm
- sglang
- trtllm
type: string
deploymentOverrides:
description: |-
DeploymentOverrides allows customizing metadata for the auto-created DGD.
Only applicable when AutoApply is true.
properties:
annotations:
additionalProperties:
type: string
description: Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
type: object
labels:
additionalProperties:
type: string
description: |-
Labels are additional labels to add to the DynamoGraphDeployment metadata.
These are merged with auto-generated labels from the profiling process.
type: object
name:
description: |-
Name is the desired name for the created DynamoGraphDeployment.
If not specified, defaults to the DGDR name.
type: string
namespace:
description: |-
Namespace is the desired namespace for the created DynamoGraphDeployment.
If not specified, defaults to the DGDR namespace.
type: string
type: object
gpu:
description: |-
GPU defines optional GPU type and resource specifications.
These constraints guide the profiler to find configurations within specified bounds.
properties:
maxNumGPUsPerEngine:
default: 8
description: |-
MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
The profiler will not consider configurations with more GPUs than this value.
minimum: 1
type: integer
minNumGPUsPerEngine:
default: 1
description: |-
MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
The profiler will not consider configurations with fewer GPUs than this value.
minimum: 1
type: integer
type:
description: |-
Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
If specified, profiling will focus on configurations optimized for this GPU type.
type: string
type: object
modelName:
description: |-
ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
This should be a valid model identifier that the profiler can resolve.
type: string
online:
default: false
description: |-
Online indicates whether to use online profiler (true) or AI Configurator (false).
Online profiling uses real deployments for accurate measurements (2-4 hours).
Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
type: boolean
profilingConfig:
description: |-
ProfilingConfig provides custom configuration for the profiling job.
Applicable to both online and offline (AIC) profiling modes.
properties:
configMapRef:
description: |-
ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
This configuration is used by both online and offline (AIC) profiling modes.
properties:
key:
default: disagg.yaml
description: Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
type: string
name:
description: Name of the ConfigMap containing the desired data.
type: string
required:
- name
type: object
type: object
sla:
description: |-
SLA defines the Service Level Agreement profiling targets.
The profiler uses these targets to find an optimal deployment configuration.
properties:
isl:
default: 3000
description: |-
ISL is the Input Sequence Length for profiling.
Defines the length of input sequences to use during profiling tests.
minimum: 1
type: integer
itl:
default: 10
description: |-
ITL is the target Inter-Token Latency in milliseconds.
This represents the maximum time allowed between consecutive tokens in the output.
type: integer
osl:
default: 500
description: |-
OSL is the Output Sequence Length for profiling.
Defines the expected length of output sequences to generate during profiling tests.
minimum: 1
type: integer
ttft:
default: 50
description: |-
TTFT is the target Time To First Token in milliseconds.
This represents the maximum time allowed from request submission to receiving the first token.
type: integer
type: object
required:
- modelName
- sla
type: object
status:
description: Status reflects the current observed state of this deployment request.
properties:
conditions:
description: |-
Conditions contains the latest observed conditions of the deployment request.
Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
Conditions are merged by type on patch updates.
items:
description: Condition contains details for one aspect of the current state of this API Resource.
properties:
lastTransitionTime:
description: |-
lastTransitionTime is the last time the condition transitioned from one status to another.
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: |-
message is a human readable message indicating details about the transition.
This may be an empty string.
maxLength: 32768
type: string
observedGeneration:
description: |-
observedGeneration represents the .metadata.generation that the condition was set based upon.
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
with respect to the current state of the instance.
format: int64
minimum: 0
type: integer
reason:
description: |-
reason contains a programmatic identifier indicating the reason for the condition's last transition.
Producers of specific condition types may define expected values and meanings for this field,
and whether the values are considered a guaranteed API.
The value should be a CamelCase string.
This field may not be empty.
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
description: status of the condition, one of True, False, Unknown.
enum:
- "True"
- "False"
- Unknown
type: string
type:
description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
deployment:
description: |-
Deployment tracks the auto-created DGD when AutoApply is true.
Contains name, namespace, state, and creation status of the managed DGD.
properties:
created:
description: |-
Created indicates whether the DGD has been successfully created.
Used to prevent recreation if the DGD is manually deleted by users.
type: boolean
name:
description: Name is the name of the created DynamoGraphDeployment.
type: string
namespace:
description: Namespace is the namespace of the created DynamoGraphDeployment.
type: string
state:
description: |-
State is the current state of the DynamoGraphDeployment.
This value is mirrored from the DGD's status.state field.
type: string
type: object
generatedDeployment:
description: |-
GeneratedDeployment contains the full generated DynamoGraphDeployment specification
including metadata, based on profiling results. Users can extract this to create
a DGD manually, or it's used automatically when autoApply is true.
Stored as RawExtension to preserve all fields including metadata.
type: object
x-kubernetes-embedded-resource: true
x-kubernetes-preserve-unknown-fields: true
observedGeneration:
description: |-
ObservedGeneration reflects the generation of the most recently observed spec.
Used to detect spec changes and enforce immutability after profiling starts.
format: int64
type: integer
profilingResults:
description: |-
ProfilingResults contains a reference to the ConfigMap holding profiling data.
Format: "configmap/<name>"
type: string
state:
description: |-
State is a high-level textual status of the deployment request lifecycle.
Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
Empty string ("") represents the initial state before initialization.
type: string
type: object
type: object
served: true
storage: true
subresources:
status: {}
......@@ -132,6 +132,7 @@ The chart includes built-in validation to prevent all operator conflicts:
| dynamo-operator.dynamo.metrics.prometheusEndpoint | string | `""` | Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables |
| dynamo-operator.dynamo.mpiRun.secretName | string | `"mpi-run-ssh-secret"` | Name of the secret containing the SSH key for MPI Run |
| dynamo-operator.dynamo.mpiRun.sshKeygen.enabled | bool | `true` | Whether to enable SSH key generation for MPI Run |
| dynamo-operator.dynamo.dgdr.profilerImage | string | `""` | Container image to use for profiling jobs (both online and offline/AIC) |
| grove.enabled | bool | `false` | Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide |
| kai-scheduler.enabled | bool | `false` | Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide |
| etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd." |
......
......@@ -124,7 +124,11 @@ spec:
- --mpi-run-ssh-secret-name={{ .Values.dynamo.mpiRun.secretName }}
- --mpi-run-ssh-secret-namespace={{ .Release.Namespace }}
{{- end }}
{{- if .Values.dynamo.dgdr.profilerImage }}
- --profiler-image={{ .Values.dynamo.dgdr.profilerImage }}
{{- end }}
{{- if not .Values.namespaceRestriction.enabled }}
- --dgdr-profiling-cluster-role-name={{ include "dynamo-operator.fullname" . }}-dgdr-profiling
- --planner-cluster-role-name={{ include "dynamo-operator.fullname" . }}-planner
{{- end }}
command:
......
......@@ -359,6 +359,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments
- dynamographdeploymentrequests
- dynamographdeployments
verbs:
- create
......@@ -372,6 +373,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/finalizers
- dynamographdeploymentrequests/finalizers
- dynamographdeployments/finalizers
verbs:
- update
......@@ -379,6 +381,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/status
- dynamographdeploymentrequests/status
- dynamographdeployments/status
verbs:
- get
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.namespaceRestriction.enabled }}
# Namespace-restricted mode: Role + ServiceAccount + RoleBinding
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: dgdr-profiling-job
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
app.kubernetes.io/component: dgdr-profiling
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: dgdr-profiling-job
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
app.kubernetes.io/component: dgdr-profiling
rules:
# ConfigMaps - needed for saving profiling results
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create", "get", "update", "patch", "delete"]
# DynamoGraphDeploymentRequests - needed to get DGDR info
- apiGroups: ["nvidia.com"]
resources: ["dynamographdeploymentrequests"]
verbs: ["get"]
# DynamoGraphDeployments - needed for online profiling to create test deployments
# The operator will handle creating the actual pods, services, and deployments
- apiGroups: ["nvidia.com"]
resources: ["dynamographdeployments"]
verbs: ["get", "create", "delete", "list", "watch"]
# Pods - needed for listing pods by label selector and getting logs from test deployments
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: dgdr-profiling-job
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
app.kubernetes.io/component: dgdr-profiling
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: dgdr-profiling-job
subjects:
- kind: ServiceAccount
name: dgdr-profiling-job
namespace: {{ .Release.Namespace }}
{{- else }}
# Cluster-wide mode: ClusterRole for DGDR profiling jobs
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-dgdr-profiling
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
app.kubernetes.io/component: dgdr-profiling
rules:
# ConfigMaps - needed for saving profiling results
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create", "get", "update", "patch", "delete"]
# DynamoGraphDeploymentRequests - needed to get DGDR info
- apiGroups: ["nvidia.com"]
resources: ["dynamographdeploymentrequests"]
verbs: ["get"]
# DynamoGraphDeployments - needed for online profiling to create test deployments
# The operator will handle creating the actual pods, services, and deployments
- apiGroups: ["nvidia.com"]
resources: ["dynamographdeployments"]
verbs: ["get", "create", "delete", "list", "watch"]
# Pods - needed for listing pods by label selector and getting logs from test deployments
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get"]
{{- end }}
# (Remove the trailing blank line at end of file)
......@@ -117,6 +117,15 @@ dynamo:
sshKeygen:
enabled: true
# DynamoGraphDeploymentRequest (DGDR) configuration
dgdr:
# Container image to use for profiling jobs (both online and offline/AIC)
# REQUIRED: Must be set to create DynamoGraphDeploymentRequests
# For development: Build and push the profiler image from the ai-dynamo repository
# Public image will be available in release 0.6.1
# Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
profilerImage: ""
#imagePullSecrets: []
kubernetesClusterDomain: cluster.local
......
......@@ -135,6 +135,15 @@ dynamo-operator:
# -- Whether to enable SSH key generation for MPI Run
enabled: true
# DynamoGraphDeploymentRequest (DGDR) configuration
dgdr:
# -- Container image to use for profiling jobs (both online and offline/AIC)
# REQUIRED: Must be set to create DynamoGraphDeploymentRequests
# For development: Build and push the profiler image from the ai-dynamo repository
# Public image will be available in release 0.6.1
# Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
profilerImage: ""
# Grove component - distributed inference orchestration
grove:
......
......@@ -5,10 +5,19 @@ A Kubernetes Operator to manage all Dynamo pipelines using custom resources.
## Overview
This operator automates the deployment and lifecycle management of `DynamoGraphDeployment` resources in Kubernetes clusters.
This operator automates the deployment and lifecycle management of Dynamo resources in Kubernetes clusters:
- **DynamoGraphDeploymentRequest (DGDR)** - Simplified SLA-driven deployment interface
- **DynamoGraphDeployment (DGD)** - Direct deployment configuration
Built with [Kubebuilder](https://book.kubebuilder.io/), it follows Kubernetes best practices and supports declarative configuration through CustomResourceDefinitions (CRDs).
### Custom Resources
- **DynamoGraphDeploymentRequest**: High-level interface for SLA-driven configuration generation. Automatically handles profiling and generates an optimized DGD spec based on your performance requirements.
- **DynamoGraphDeployment**: Lower-level interface for direct deployment configuration with full control over all parameters.
## Developer guide
### Pre-requisites
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
Package v1alpha1 contains API Schema definitions for the nvidia.com v1alpha1 API group.
This package defines the DynamoGraphDeploymentRequest (DGDR) custom resource, which provides
a high-level, SLA-driven interface for deploying machine learning models on Dynamo.
*/
package v1alpha1
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
// SLASpec defines Service Level Agreement targets for model profiling and deployment.
// These targets guide the profiling process to find optimal deployment configurations
// that meet the specified performance requirements.
type SLASpec struct {
// ITL is the target Inter-Token Latency in milliseconds.
// This represents the maximum time allowed between consecutive tokens in the output.
// +kubebuilder:default=10
// +optional
ITL int `json:"itl,omitempty"`
// TTFT is the target Time To First Token in milliseconds.
// This represents the maximum time allowed from request submission to receiving the first token.
// +kubebuilder:default=50
// +optional
TTFT int `json:"ttft,omitempty"`
// ISL is the Input Sequence Length for profiling.
// Defines the length of input sequences to use during profiling tests.
// +kubebuilder:default=3000
// +kubebuilder:validation:Minimum=1
// +optional
ISL int `json:"isl,omitempty"`
// OSL is the Output Sequence Length for profiling.
// Defines the expected length of output sequences to generate during profiling tests.
// +kubebuilder:default=500
// +kubebuilder:validation:Minimum=1
// +optional
OSL int `json:"osl,omitempty"`
}
// GPUSpec defines optional GPU type and resource specifications for profiling and deployment.
// These constraints help narrow down the search space during profiling to find configurations
// that fit within specified hardware bounds.
type GPUSpec struct {
// Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
// If specified, profiling will focus on configurations optimized for this GPU type.
// +kubebuilder:validation:Optional
Type string `json:"type,omitempty"`
// MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
// The profiler will not consider configurations with fewer GPUs than this value.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Minimum=1
// +kubebuilder:default=1
MinNumGPUsPerEngine int `json:"minNumGPUsPerEngine,omitempty"`
// MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
// The profiler will not consider configurations with more GPUs than this value.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Minimum=1
// +kubebuilder:default=8
MaxNumGPUsPerEngine int `json:"maxNumGPUsPerEngine,omitempty"`
}
// ConfigMapKeySelector selects a specific key from a ConfigMap.
// Used to reference external configuration data stored in ConfigMaps.
type ConfigMapKeySelector struct {
// Name of the ConfigMap containing the desired data.
// +kubebuilder:validation:Required
Name string `json:"name"`
// Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
// +kubebuilder:default=disagg.yaml
Key string `json:"key,omitempty"`
}
// ProfilingConfigSpec defines configuration for the profiling process.
// Allows users to provide custom profiling parameters via ConfigMap references.
type ProfilingConfigSpec struct {
// ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
// The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
// This configuration is used by both online and offline (AIC) profiling modes.
// +kubebuilder:validation:Optional
ConfigMapRef *ConfigMapKeySelector `json:"configMapRef,omitempty"`
}
// DeploymentOverridesSpec allows users to customize metadata for auto-created DynamoGraphDeployments.
// When autoApply is enabled, these overrides are applied to the generated DGD resource.
type DeploymentOverridesSpec struct {
// Name is the desired name for the created DynamoGraphDeployment.
// If not specified, defaults to the DGDR name.
// +kubebuilder:validation:Optional
Name string `json:"name,omitempty"`
// Namespace is the desired namespace for the created DynamoGraphDeployment.
// If not specified, defaults to the DGDR namespace.
// +kubebuilder:validation:Optional
Namespace string `json:"namespace,omitempty"`
// Labels are additional labels to add to the DynamoGraphDeployment metadata.
// These are merged with auto-generated labels from the profiling process.
// +kubebuilder:validation:Optional
Labels map[string]string `json:"labels,omitempty"`
// Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
// +kubebuilder:validation:Optional
Annotations map[string]string `json:"annotations,omitempty"`
}
// DynamoGraphDeploymentRequestSpec defines the desired state of a DynamoGraphDeploymentRequest.
// This CRD serves as the primary interface for users to request model deployments with
// specific performance constraints and resource requirements, enabling SLA-driven deployments.
type DynamoGraphDeploymentRequestSpec struct {
// ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
// This should be a valid model identifier that the profiler can resolve.
// +kubebuilder:validation:Required
ModelName string `json:"modelName"`
// Backend specifies the inference backend framework to use.
// Supported values are: "vllm", "sglang", "trtllm".
// +kubebuilder:validation:Enum=vllm;sglang;trtllm
// +kubebuilder:default=trtllm
Backend string `json:"backend,omitempty"`
// SLA defines the Service Level Agreement profiling targets.
// The profiler uses these targets to find an optimal deployment configuration.
// +kubebuilder:validation:Required
SLA SLASpec `json:"sla"`
// GPU defines optional GPU type and resource specifications.
// These constraints guide the profiler to find configurations within specified bounds.
// +kubebuilder:validation:Optional
GPU *GPUSpec `json:"gpu,omitempty"`
// Online indicates whether to use online profiler (true) or AI Configurator (false).
// Online profiling uses real deployments for accurate measurements (2-4 hours).
// Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
// +kubebuilder:default=false
Online bool `json:"online,omitempty"`
// AutoApply indicates whether to automatically create a DynamoGraphDeployment
// after profiling completes. If false, only the spec is generated and stored in status.
// Users can then manually create a DGD using the generated spec.
// +kubebuilder:default=false
AutoApply bool `json:"autoApply,omitempty"`
// DeploymentOverrides allows customizing metadata for the auto-created DGD.
// Only applicable when AutoApply is true.
// +kubebuilder:validation:Optional
DeploymentOverrides *DeploymentOverridesSpec `json:"deploymentOverrides,omitempty"`
// ProfilingConfig provides custom configuration for the profiling job.
// Applicable to both online and offline (AIC) profiling modes.
// +kubebuilder:validation:Optional
ProfilingConfig *ProfilingConfigSpec `json:"profilingConfig,omitempty"`
}
// DeploymentStatus tracks the state of an auto-created DynamoGraphDeployment.
// This status is populated when autoApply is enabled and a DGD is created.
type DeploymentStatus struct {
// Name is the name of the created DynamoGraphDeployment.
Name string `json:"name,omitempty"`
// Namespace is the namespace of the created DynamoGraphDeployment.
Namespace string `json:"namespace,omitempty"`
// State is the current state of the DynamoGraphDeployment.
// This value is mirrored from the DGD's status.state field.
State string `json:"state,omitempty"`
// Created indicates whether the DGD has been successfully created.
// Used to prevent recreation if the DGD is manually deleted by users.
Created bool `json:"created,omitempty"`
}
// DynamoGraphDeploymentRequestStatus represents the observed state of a DynamoGraphDeploymentRequest.
// The controller updates this status as the DGDR progresses through its lifecycle.
type DynamoGraphDeploymentRequestStatus struct {
// State is a high-level textual status of the deployment request lifecycle.
// Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
// Empty string ("") represents the initial state before initialization.
State string `json:"state,omitempty"`
// ObservedGeneration reflects the generation of the most recently observed spec.
// Used to detect spec changes and enforce immutability after profiling starts.
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Conditions contains the latest observed conditions of the deployment request.
// Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
// Conditions are merged by type on patch updates.
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
// ProfilingResults contains a reference to the ConfigMap holding profiling data.
// Format: "configmap/<name>"
// +kubebuilder:validation:Optional
ProfilingResults string `json:"profilingResults,omitempty"`
// GeneratedDeployment contains the full generated DynamoGraphDeployment specification
// including metadata, based on profiling results. Users can extract this to create
// a DGD manually, or it's used automatically when autoApply is true.
// Stored as RawExtension to preserve all fields including metadata.
// +kubebuilder:validation:Optional
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:EmbeddedResource
GeneratedDeployment *runtime.RawExtension `json:"generatedDeployment,omitempty"`
// Deployment tracks the auto-created DGD when AutoApply is true.
// Contains name, namespace, state, and creation status of the managed DGD.
// +kubebuilder:validation:Optional
Deployment *DeploymentStatus `json:"deployment,omitempty"`
}
// DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
// It serves as the primary interface for users to request model deployments with
// specific performance and resource constraints, enabling SLA-driven deployments.
//
// Lifecycle:
// 1. Initial → Pending: Validates spec and prepares for profiling
// 2. Pending → Profiling: Creates and runs profiling job (online or AIC)
// 3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
// 4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
// 5. Ready: Terminal state when DGD is operational or spec is available
// 6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
//
// The spec becomes immutable once profiling starts. Users must delete and recreate
// the DGDR to modify configuration after this point.
//
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:shortName=dgdr
// +kubebuilder:printcolumn:name="Model",type=string,JSONPath=`.spec.modelName`
// +kubebuilder:printcolumn:name="Backend",type=string,JSONPath=`.spec.backend`
// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state`
// +kubebuilder:printcolumn:name="DGD-State",type=string,JSONPath=`.status.deployment.state`
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
type DynamoGraphDeploymentRequest struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Spec defines the desired state for this deployment request.
Spec DynamoGraphDeploymentRequestSpec `json:"spec,omitempty"`
// Status reflects the current observed state of this deployment request.
Status DynamoGraphDeploymentRequestStatus `json:"status,omitempty"`
}
// SetState updates the State field in the DGDR status.
func (s *DynamoGraphDeploymentRequest) SetState(state string) {
s.Status.State = state
}
// GetSpec returns the spec of this DGDR as a generic interface.
// Implements a common interface used by controller utilities.
func (s *DynamoGraphDeploymentRequest) GetSpec() any {
return s.Spec
}
// SetSpec updates the spec of this DGDR from a generic interface value.
// Implements a common interface used by controller utilities.
func (s *DynamoGraphDeploymentRequest) SetSpec(spec any) {
s.Spec = spec.(DynamoGraphDeploymentRequestSpec)
}
// AddStatusCondition adds or updates a condition in the status.
// If a condition with the same type already exists, it replaces it.
// Otherwise, it appends the new condition to the list.
func (s *DynamoGraphDeploymentRequest) AddStatusCondition(condition metav1.Condition) {
if s.Status.Conditions == nil {
s.Status.Conditions = []metav1.Condition{}
}
// Check if condition with same type already exists
for i, existingCondition := range s.Status.Conditions {
if existingCondition.Type == condition.Type {
// Replace the existing condition
s.Status.Conditions[i] = condition
return
}
}
// If no matching condition found, append the new one
s.Status.Conditions = append(s.Status.Conditions, condition)
}
// DynamoGraphDeploymentRequestList contains a list of DynamoGraphDeploymentRequest resources.
//
// +kubebuilder:object:root=true
type DynamoGraphDeploymentRequestList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []DynamoGraphDeploymentRequest `json:"items"`
}
func init() {
SchemeBuilder.Register(&DynamoGraphDeploymentRequest{}, &DynamoGraphDeploymentRequestList{})
}
......@@ -42,7 +42,7 @@ import (
"k8s.io/api/autoscaling/v2"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
......@@ -114,6 +114,65 @@ func (in *BaseStatus) DeepCopy() *BaseStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ConfigMapKeySelector) DeepCopyInto(out *ConfigMapKeySelector) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigMapKeySelector.
func (in *ConfigMapKeySelector) DeepCopy() *ConfigMapKeySelector {
if in == nil {
return nil
}
out := new(ConfigMapKeySelector)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeploymentOverridesSpec) DeepCopyInto(out *DeploymentOverridesSpec) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Annotations != nil {
in, out := &in.Annotations, &out.Annotations
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentOverridesSpec.
func (in *DeploymentOverridesSpec) DeepCopy() *DeploymentOverridesSpec {
if in == nil {
return nil
}
out := new(DeploymentOverridesSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeploymentStatus) DeepCopyInto(out *DeploymentStatus) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentStatus.
func (in *DeploymentStatus) DeepCopy() *DeploymentStatus {
if in == nil {
return nil
}
out := new(DeploymentStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoComponentDeployment) DeepCopyInto(out *DynamoComponentDeployment) {
*out = *in
......@@ -378,6 +437,128 @@ func (in *DynamoGraphDeploymentList) DeepCopyObject() runtime.Object {
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequest) DeepCopyInto(out *DynamoGraphDeploymentRequest) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
in.Spec.DeepCopyInto(&out.Spec)
in.Status.DeepCopyInto(&out.Status)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequest.
func (in *DynamoGraphDeploymentRequest) DeepCopy() *DynamoGraphDeploymentRequest {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequest)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *DynamoGraphDeploymentRequest) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestList) DeepCopyInto(out *DynamoGraphDeploymentRequestList) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ListMeta.DeepCopyInto(&out.ListMeta)
if in.Items != nil {
in, out := &in.Items, &out.Items
*out = make([]DynamoGraphDeploymentRequest, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestList.
func (in *DynamoGraphDeploymentRequestList) DeepCopy() *DynamoGraphDeploymentRequestList {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestList)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *DynamoGraphDeploymentRequestList) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestSpec) DeepCopyInto(out *DynamoGraphDeploymentRequestSpec) {
*out = *in
out.SLA = in.SLA
if in.GPU != nil {
in, out := &in.GPU, &out.GPU
*out = new(GPUSpec)
**out = **in
}
if in.DeploymentOverrides != nil {
in, out := &in.DeploymentOverrides, &out.DeploymentOverrides
*out = new(DeploymentOverridesSpec)
(*in).DeepCopyInto(*out)
}
if in.ProfilingConfig != nil {
in, out := &in.ProfilingConfig, &out.ProfilingConfig
*out = new(ProfilingConfigSpec)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestSpec.
func (in *DynamoGraphDeploymentRequestSpec) DeepCopy() *DynamoGraphDeploymentRequestSpec {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeploymentRequestStatus) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]metav1.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.GeneratedDeployment != nil {
in, out := &in.GeneratedDeployment, &out.GeneratedDeployment
*out = new(runtime.RawExtension)
(*in).DeepCopyInto(*out)
}
if in.Deployment != nil {
in, out := &in.Deployment, &out.Deployment
*out = new(DeploymentStatus)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestStatus.
func (in *DynamoGraphDeploymentRequestStatus) DeepCopy() *DynamoGraphDeploymentRequestStatus {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) {
*out = *in
......@@ -445,6 +626,21 @@ func (in *DynamoGraphDeploymentStatus) DeepCopy() *DynamoGraphDeploymentStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *GPUSpec) DeepCopyInto(out *GPUSpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUSpec.
func (in *GPUSpec) DeepCopy() *GPUSpec {
if in == nil {
return nil
}
out := new(GPUSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
*out = *in
......@@ -555,6 +751,41 @@ func (in *PVC) DeepCopy() *PVC {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProfilingConfigSpec) DeepCopyInto(out *ProfilingConfigSpec) {
*out = *in
if in.ConfigMapRef != nil {
in, out := &in.ConfigMapRef, &out.ConfigMapRef
*out = new(ConfigMapKeySelector)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProfilingConfigSpec.
func (in *ProfilingConfigSpec) DeepCopy() *ProfilingConfigSpec {
if in == nil {
return nil
}
out := new(ProfilingConfigSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SLASpec) DeepCopyInto(out *SLASpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SLASpec.
func (in *SLASpec) DeepCopy() *SLASpec {
if in == nil {
return nil
}
out := new(SLASpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
*out = *in
......
......@@ -140,6 +140,8 @@ func main() {
var mpiRunSecretName string
var mpiRunSecretNamespace string
var plannerClusterRoleName string
var profilerImage string
var dgdrProfilingClusterRoleName string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
......@@ -180,6 +182,10 @@ func main() {
"Namespace where the MPI SSH secret is located (required)")
flag.StringVar(&plannerClusterRoleName, "planner-cluster-role-name", "",
"Name of the ClusterRole for planner (cluster-wide mode only)")
flag.StringVar(&profilerImage, "profiler-image", "",
"Container image to use for profiling jobs (both online and offline/AIC) (for DynamoGraphDeploymentRequest)")
flag.StringVar(&dgdrProfilingClusterRoleName, "dgdr-profiling-cluster-role-name", "",
"Name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)")
opts := zap.Options{
Development: true,
}
......@@ -237,6 +243,7 @@ func main() {
},
RBAC: commonController.RBACConfig{
PlannerClusterRoleName: plannerClusterRoleName,
DGDRProfilingClusterRoleName: dgdrProfilingClusterRoleName,
},
}
......@@ -449,6 +456,17 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment")
os.Exit(1)
}
if err = (&controller.DynamoGraphDeploymentRequestReconciler{
Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor("dynamographdeploymentrequest"),
ProfilerImage: profilerImage,
Config: ctrlConfig,
RBACManager: rbacManager,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeploymentRequest")
os.Exit(1)
}
//+kubebuilder:scaffold:builder
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamographdeploymentrequests.nvidia.com
spec:
group: nvidia.com
names:
kind: DynamoGraphDeploymentRequest
listKind: DynamoGraphDeploymentRequestList
plural: dynamographdeploymentrequests
shortNames:
- dgdr
singular: dynamographdeploymentrequest
scope: Namespaced
versions:
- additionalPrinterColumns:
- jsonPath: .spec.modelName
name: Model
type: string
- jsonPath: .spec.backend
name: Backend
type: string
- jsonPath: .status.state
name: State
type: string
- jsonPath: .status.deployment.state
name: DGD-State
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: |-
DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
It serves as the primary interface for users to request model deployments with
specific performance and resource constraints, enabling SLA-driven deployments.
Lifecycle:
1. Initial → Pending: Validates spec and prepares for profiling
2. Pending → Profiling: Creates and runs profiling job (online or AIC)
3. Profiling → Ready/Deploying: Generates DGD spec after profiling completes
4. Deploying → Ready: When autoApply=true, monitors DGD until Ready
5. Ready: Terminal state when DGD is operational or spec is available
6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
The spec becomes immutable once profiling starts. Users must delete and recreate
the DGDR to modify configuration after this point.
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: Spec defines the desired state for this deployment request.
properties:
autoApply:
default: false
description: |-
AutoApply indicates whether to automatically create a DynamoGraphDeployment
after profiling completes. If false, only the spec is generated and stored in status.
Users can then manually create a DGD using the generated spec.
type: boolean
backend:
default: trtllm
description: |-
Backend specifies the inference backend framework to use.
Supported values are: "vllm", "sglang", "trtllm".
enum:
- vllm
- sglang
- trtllm
type: string
deploymentOverrides:
description: |-
DeploymentOverrides allows customizing metadata for the auto-created DGD.
Only applicable when AutoApply is true.
properties:
annotations:
additionalProperties:
type: string
description: Annotations are additional annotations to add to the DynamoGraphDeployment metadata.
type: object
labels:
additionalProperties:
type: string
description: |-
Labels are additional labels to add to the DynamoGraphDeployment metadata.
These are merged with auto-generated labels from the profiling process.
type: object
name:
description: |-
Name is the desired name for the created DynamoGraphDeployment.
If not specified, defaults to the DGDR name.
type: string
namespace:
description: |-
Namespace is the desired namespace for the created DynamoGraphDeployment.
If not specified, defaults to the DGDR namespace.
type: string
type: object
gpu:
description: |-
GPU defines optional GPU type and resource specifications.
These constraints guide the profiler to find configurations within specified bounds.
properties:
maxNumGPUsPerEngine:
default: 8
description: |-
MaxNumGPUsPerEngine specifies the maximum number of GPUs per engine for profiling.
The profiler will not consider configurations with more GPUs than this value.
minimum: 1
type: integer
minNumGPUsPerEngine:
default: 1
description: |-
MinNumGPUsPerEngine specifies the minimum number of GPUs per engine for profiling.
The profiler will not consider configurations with fewer GPUs than this value.
minimum: 1
type: integer
type:
description: |-
Type specifies the GPU type to target (e.g., "h200", "h100", "a100").
If specified, profiling will focus on configurations optimized for this GPU type.
type: string
type: object
modelName:
description: |-
ModelName specifies the model to deploy (e.g., "meta/llama3-70b").
This should be a valid model identifier that the profiler can resolve.
type: string
online:
default: false
description: |-
Online indicates whether to use online profiler (true) or AI Configurator (false).
Online profiling uses real deployments for accurate measurements (2-4 hours).
Offline profiling uses AI Configurator for fast simulation-based profiling (20-30 seconds).
type: boolean
profilingConfig:
description: |-
ProfilingConfig provides custom configuration for the profiling job.
Applicable to both online and offline (AIC) profiling modes.
properties:
configMapRef:
description: |-
ConfigMapRef is a reference to a ConfigMap containing profiling configuration.
The ConfigMap should contain a key (default: "disagg.yaml") with the configuration file.
This configuration is used by both online and offline (AIC) profiling modes.
properties:
key:
default: disagg.yaml
description: Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml".
type: string
name:
description: Name of the ConfigMap containing the desired data.
type: string
required:
- name
type: object
type: object
sla:
description: |-
SLA defines the Service Level Agreement profiling targets.
The profiler uses these targets to find an optimal deployment configuration.
properties:
isl:
default: 3000
description: |-
ISL is the Input Sequence Length for profiling.
Defines the length of input sequences to use during profiling tests.
minimum: 1
type: integer
itl:
default: 10
description: |-
ITL is the target Inter-Token Latency in milliseconds.
This represents the maximum time allowed between consecutive tokens in the output.
type: integer
osl:
default: 500
description: |-
OSL is the Output Sequence Length for profiling.
Defines the expected length of output sequences to generate during profiling tests.
minimum: 1
type: integer
ttft:
default: 50
description: |-
TTFT is the target Time To First Token in milliseconds.
This represents the maximum time allowed from request submission to receiving the first token.
type: integer
type: object
required:
- modelName
- sla
type: object
status:
description: Status reflects the current observed state of this deployment request.
properties:
conditions:
description: |-
Conditions contains the latest observed conditions of the deployment request.
Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.
Conditions are merged by type on patch updates.
items:
description: Condition contains details for one aspect of the current state of this API Resource.
properties:
lastTransitionTime:
description: |-
lastTransitionTime is the last time the condition transitioned from one status to another.
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
format: date-time
type: string
message:
description: |-
message is a human readable message indicating details about the transition.
This may be an empty string.
maxLength: 32768
type: string
observedGeneration:
description: |-
observedGeneration represents the .metadata.generation that the condition was set based upon.
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
with respect to the current state of the instance.
format: int64
minimum: 0
type: integer
reason:
description: |-
reason contains a programmatic identifier indicating the reason for the condition's last transition.
Producers of specific condition types may define expected values and meanings for this field,
and whether the values are considered a guaranteed API.
The value should be a CamelCase string.
This field may not be empty.
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
description: status of the condition, one of True, False, Unknown.
enum:
- "True"
- "False"
- Unknown
type: string
type:
description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
deployment:
description: |-
Deployment tracks the auto-created DGD when AutoApply is true.
Contains name, namespace, state, and creation status of the managed DGD.
properties:
created:
description: |-
Created indicates whether the DGD has been successfully created.
Used to prevent recreation if the DGD is manually deleted by users.
type: boolean
name:
description: Name is the name of the created DynamoGraphDeployment.
type: string
namespace:
description: Namespace is the namespace of the created DynamoGraphDeployment.
type: string
state:
description: |-
State is the current state of the DynamoGraphDeployment.
This value is mirrored from the DGD's status.state field.
type: string
type: object
generatedDeployment:
description: |-
GeneratedDeployment contains the full generated DynamoGraphDeployment specification
including metadata, based on profiling results. Users can extract this to create
a DGD manually, or it's used automatically when autoApply is true.
Stored as RawExtension to preserve all fields including metadata.
type: object
x-kubernetes-embedded-resource: true
x-kubernetes-preserve-unknown-fields: true
observedGeneration:
description: |-
ObservedGeneration reflects the generation of the most recently observed spec.
Used to detect spec changes and enforce immutability after profiling starts.
format: int64
type: integer
profilingResults:
description: |-
ProfilingResults contains a reference to the ConfigMap holding profiling data.
Format: "configmap/<name>"
type: string
state:
description: |-
State is a high-level textual status of the deployment request lifecycle.
Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"
Empty string ("") represents the initial state before initialization.
type: string
type: object
type: object
served: true
storage: true
subresources:
status: {}
......@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
......@@ -74,6 +73,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- coordination.k8s.io
resources:
......@@ -160,6 +171,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments
- dynamographdeploymentrequests
- dynamographdeployments
verbs:
- create
......@@ -173,6 +185,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/finalizers
- dynamographdeploymentrequests/finalizers
- dynamographdeployments/finalizers
verbs:
- update
......@@ -180,6 +193,7 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/status
- dynamographdeploymentrequests/status
- dynamographdeployments/status
verbs:
- get
......
......@@ -18,4 +18,5 @@ resources:
- nvidia.com_v1alpha1_dynamocomponentdeployment.yaml
- nvidia.com_v1alpha1_dynamocomponent.yaml
- nvidia.com_v1alpha1_dynamographdeployment.yaml
- nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
#+kubebuilder:scaffold:manifestskustomizesamples
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeploymentRequest
metadata:
name: example-llm-sla
spec:
modelName: "meta/llama3-70b"
backend: trtllm # enum: [vllm, sglang, trtllm]; default is trtllm
sla: # SLA profiling targets (all fields optional with defaults)
itl: 10 # Inter-Token Latency target in milliseconds (default: 10)
ttft: 50 # Time To First Token target in milliseconds (default: 50)
isl: 3000 # Input Sequence Length (default: 3000)
osl: 500 # Output Sequence Length (default: 500)
gpu: # optional
type: h200_sxm
minNumGPUsPerEngine: 1 # default is 1
maxNumGPUsPerEngine: 8 # default is 8
online: false # true for online profiler, false for AIC profiler
# Optional: Automatically create DynamoGraphDeployment after profiling
autoApply: true # default is false
# Optional: Override metadata for auto-created DGD (only used when autoApply: true)
# deploymentOverrides:
# name: my-custom-dgd-name
# namespace: production
# labels:
# team: ml-platform
# annotations:
# description: "Auto-generated from DGDR"
# Currently required for both online and offline/AIC profiling, but will be removed in the future
profilingConfig:
configMapRef:
name: my-profiling-config
key: disagg.yaml # default is "disagg.yaml"
......@@ -80,7 +80,7 @@ var _ = BeforeSuite(func() {
filepath.Join(".", "testing", "run.ai"),
filepath.Join(".", "testing", "nvidia"),
},
ErrorIfCRDPathMissing: true,
ErrorIfCRDPathMissing: false,
// The BinaryAssetsDirectory is only required if you want to run the tests directly
// without call the makefile target test. If not informed it will look for the
......
......@@ -74,6 +74,8 @@ type Config struct {
type RBACConfig struct {
// PlannerClusterRoleName is the name of the ClusterRole for planner (cluster-wide mode only)
PlannerClusterRoleName string
// DGDRProfilingClusterRoleName is the name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)
DGDRProfilingClusterRoleName string
}
type IngressConfig struct {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment