/* * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package v1alpha1 import ( autoscalingv2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) // +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required" type PVC struct { // Create indicates to create a new PVC Create *bool `json:"create,omitempty"` // Name is the name of the PVC // +kubebuilder:validation:Required Name *string `json:"name,omitempty"` // StorageClass to be used for PVC creation. Required when create is true. StorageClass string `json:"storageClass,omitempty"` // Size of the volume in Gi, used during PVC creation. Required when create is true. Size resource.Quantity `json:"size,omitempty"` // VolumeAccessMode is the volume access mode of the PVC. Required when create is true. VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"` } // VolumeMount references a PVC defined at the top level for volumes to be mounted by the component type VolumeMount struct { // Name references a PVC name defined in the top-level PVCs map // +kubebuilder:validation:Required Name string `json:"name,omitempty"` // MountPoint specifies where to mount the volume. // If useAsCompilationCache is true and mountPoint is not specified, // a backend-specific default will be used. MountPoint string `json:"mountPoint,omitempty"` // UseAsCompilationCache indicates this volume should be used as a compilation cache. // When true, backend-specific environment variables will be set and default mount points may be used. // +kubebuilder:default=false UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"` } // Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter // with HPA, KEDA, or Planner for autoscaling instead. See docs/pages/kubernetes/autoscaling.md // for migration guidance. This field will be removed in a future API version. type Autoscaling struct { // Deprecated: This field is ignored. Enabled bool `json:"enabled,omitempty"` // Deprecated: This field is ignored. MinReplicas int `json:"minReplicas,omitempty"` // Deprecated: This field is ignored. MaxReplicas int `json:"maxReplicas,omitempty"` // Deprecated: This field is ignored. Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"` // Deprecated: This field is ignored. Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"` } type SharedMemorySpec struct { Disabled bool `json:"disabled,omitempty"` Size resource.Quantity `json:"size,omitempty"` } type ResourceItem struct { // CPU specifies the CPU resource request/limit (e.g., "1000m", "2") CPU string `json:"cpu,omitempty"` // Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi") Memory string `json:"memory,omitempty"` // GPU indicates the number of GPUs to request. // Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment. GPU string `json:"gpu,omitempty"` // GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe" // By default if not specified, the GPU type is "nvidia.com/gpu" GPUType string `json:"gpuType,omitempty"` // Custom specifies additional custom resource requests/limits Custom map[string]string `json:"custom,omitempty"` } // Resources defines requested and limits for a component, including CPU, memory, // GPUs/devices, and any runtime-specific resources. type Resources struct { // Requests specifies the minimum resources required by the component Requests *ResourceItem `json:"requests,omitempty"` // Limits specifies the maximum resources allowed for the component Limits *ResourceItem `json:"limits,omitempty"` // Claims specifies resource claims for dynamic resource allocation Claims []corev1.ResourceClaim `json:"claims,omitempty"` } type DeploymentTargetHPAConf struct { CPU *int32 `json:"cpu,omitempty"` GPU *int32 `json:"gpu,omitempty"` Memory *string `json:"memory,omitempty"` QPS *int64 `json:"qps,omitempty"` MinReplicas *int32 `json:"min_replicas,omitempty"` MaxReplicas *int32 `json:"max_replicas,omitempty"` } type LabelItemSchema struct { Key string `json:"key"` Value string `json:"value"` } type ExtraPodMetadata struct { Annotations map[string]string `json:"annotations,omitempty"` Labels map[string]string `json:"labels,omitempty"` } type ExtraPodSpec struct { *corev1.PodSpec `json:",inline"` MainContainer *corev1.Container `json:"mainContainer,omitempty"` } // ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter // for replica management. When enabled, the DGDSA owns the replicas field and // external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource. type ScalingAdapter struct { // Enabled indicates whether the ScalingAdapter should be enabled for this service. // When true, a DGDSA is created and owns the replicas field. // When false (default), no DGDSA is created and replicas can be modified directly in the DGD. // +optional // +kubebuilder:default=false Enabled bool `json:"enabled,omitempty"` } // CheckpointMode defines how checkpoint creation is handled // +kubebuilder:validation:Enum=Auto;Manual type CheckpointMode string const ( // CheckpointModeAuto means the DGD controller will automatically create a Checkpoint CR CheckpointModeAuto CheckpointMode = "Auto" // CheckpointModeManual means the user must create the Checkpoint CR themselves CheckpointModeManual CheckpointMode = "Manual" ) // ServiceCheckpointConfig configures checkpointing for a DGD service // +kubebuilder:validation:XValidation:rule="!self.enabled || (has(self.checkpointRef) && size(self.checkpointRef) > 0) || (has(self.identity) && has(self.identity.model) && has(self.identity.backendFramework))",message="When enabled, either checkpointRef or both identity.model and identity.backendFramework must be specified" type ServiceCheckpointConfig struct { // Enabled indicates whether checkpointing is enabled for this service // +optional // +kubebuilder:default=false Enabled bool `json:"enabled,omitempty"` // Mode defines how checkpoint creation is handled // - Auto: DGD controller creates Checkpoint CR automatically // - Manual: User must create Checkpoint CR // +optional // +kubebuilder:default=Auto Mode CheckpointMode `json:"mode,omitempty"` // CheckpointRef references an existing Checkpoint CR to use // If specified, Identity is ignored and this checkpoint is used directly // +optional CheckpointRef *string `json:"checkpointRef,omitempty"` // Identity defines the checkpoint identity for hash computation // Used when Mode is Auto or when looking up existing checkpoints // Required when checkpointRef is not specified // +optional Identity *DynamoCheckpointIdentity `json:"identity,omitempty"` }