common.go 8.76 KB
Newer Older
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package v1alpha1

import (
21
22
	"encoding/json"

23
24
25
26
27
	autoscalingv2 "k8s.io/api/autoscaling/v2"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
)

28
// +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required"
29
30
31
32
type PVC struct {
	// Create indicates to create a new PVC
	Create *bool `json:"create,omitempty"`
	// Name is the name of the PVC
33
	// +kubebuilder:validation:Required
34
	Name *string `json:"name,omitempty"`
35
	// StorageClass to be used for PVC creation. Required when create is true.
36
	StorageClass string `json:"storageClass,omitempty"`
37
	// Size of the volume in Gi, used during PVC creation. Required when create is true.
38
	Size resource.Quantity `json:"size,omitempty"`
39
	// VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
40
	VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
}

// VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
type VolumeMount struct {
	// Name references a PVC name defined in the top-level PVCs map
	// +kubebuilder:validation:Required
	Name string `json:"name,omitempty"`
	// MountPoint specifies where to mount the volume.
	// If useAsCompilationCache is true and mountPoint is not specified,
	// a backend-specific default will be used.
	MountPoint string `json:"mountPoint,omitempty"`
	// UseAsCompilationCache indicates this volume should be used as a compilation cache.
	// When true, backend-specific environment variables will be set and default mount points may be used.
	// +kubebuilder:default=false
	UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
56
57
}

58
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
59
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
60
// for migration guidance. This field will be removed in a future API version.
61
type Autoscaling struct {
62
63
64
65
66
67
68
69
70
71
	// Deprecated: This field is ignored.
	Enabled bool `json:"enabled,omitempty"`
	// Deprecated: This field is ignored.
	MinReplicas int `json:"minReplicas,omitempty"`
	// Deprecated: This field is ignored.
	MaxReplicas int `json:"maxReplicas,omitempty"`
	// Deprecated: This field is ignored.
	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
	// Deprecated: This field is ignored.
	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
72
}
73

atchernych's avatar
atchernych committed
74
// +kubebuilder:validation:XValidation:rule="!(has(self.disabled) && self.disabled && has(self.size))",message="sharedMemory.size must not be set when sharedMemory.disabled is true"
75
76
77
78
type SharedMemorySpec struct {
	Disabled bool              `json:"disabled,omitempty"`
	Size     resource.Quantity `json:"size,omitempty"`
}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

type ResourceItem struct {
	// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
	CPU string `json:"cpu,omitempty"`
	// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
	Memory string `json:"memory,omitempty"`
	// GPU indicates the number of GPUs to request.
	// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
	GPU string `json:"gpu,omitempty"`
	// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
	// By default if not specified, the GPU type is "nvidia.com/gpu"
	GPUType string `json:"gpuType,omitempty"`
	// Custom specifies additional custom resource requests/limits
	Custom map[string]string `json:"custom,omitempty"`
}

// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type Resources struct {
	// Requests specifies the minimum resources required by the component
	Requests *ResourceItem `json:"requests,omitempty"`
	// Limits specifies the maximum resources allowed for the component
	Limits *ResourceItem `json:"limits,omitempty"`
	// Claims specifies resource claims for dynamic resource allocation
	Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}

type DeploymentTargetHPAConf struct {
	CPU         *int32  `json:"cpu,omitempty"`
	GPU         *int32  `json:"gpu,omitempty"`
	Memory      *string `json:"memory,omitempty"`
	QPS         *int64  `json:"qps,omitempty"`
	MinReplicas *int32  `json:"min_replicas,omitempty"`
	MaxReplicas *int32  `json:"max_replicas,omitempty"`
}

type LabelItemSchema struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

type ExtraPodMetadata struct {
	Annotations map[string]string `json:"annotations,omitempty"`
	Labels      map[string]string `json:"labels,omitempty"`
}

type ExtraPodSpec struct {
	*corev1.PodSpec `json:",inline"`
	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
}
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// MarshalJSON implements json.Marshaler for ExtraPodSpec.
//
// corev1.PodSpec.Containers is declared without omitempty, so a nil slice
// serializes as "containers": null.  The CRD structural schema defines
// containers as type: array and rejects null.  This custom marshaller shadows
// the Containers field with an omitempty-tagged copy so that nil/empty
// Containers are omitted from the JSON output entirely.
func (e ExtraPodSpec) MarshalJSON() ([]byte, error) {
	// Type alias strips methods from corev1.PodSpec, preventing infinite
	// recursion through any MarshalJSON defined on PodSpec.
	type PodSpecAlias corev1.PodSpec

	aux := struct {
		*PodSpecAlias `json:",inline"`
		Containers    []corev1.Container `json:"containers,omitempty"`
		MainContainer *corev1.Container  `json:"mainContainer,omitempty"`
	}{}

	if e.PodSpec != nil {
		a := PodSpecAlias(*e.PodSpec)
		aux.PodSpecAlias = &a
		aux.Containers = e.PodSpec.Containers
	}
	aux.MainContainer = e.MainContainer

	return json.Marshal(aux)
}

158
// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
159
// for replica management. When enabled, the DGDSA owns the replicas field and
160
161
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
type ScalingAdapter struct {
162
163
164
	// Enabled indicates whether the ScalingAdapter should be enabled for this service.
	// When true, a DGDSA is created and owns the replicas field.
	// When false (default), no DGDSA is created and replicas can be modified directly in the DGD.
165
166
	// +optional
	// +kubebuilder:default=false
167
	Enabled bool `json:"enabled,omitempty"`
168
}
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

// CheckpointMode defines how checkpoint creation is handled
// +kubebuilder:validation:Enum=Auto;Manual
type CheckpointMode string

const (
	// CheckpointModeAuto means the DGD controller will automatically create a Checkpoint CR
	CheckpointModeAuto CheckpointMode = "Auto"
	// CheckpointModeManual means the user must create the Checkpoint CR themselves
	CheckpointModeManual CheckpointMode = "Manual"
)

// ServiceCheckpointConfig configures checkpointing for a DGD service
// +kubebuilder:validation:XValidation:rule="!self.enabled || (has(self.checkpointRef) && size(self.checkpointRef) > 0) || (has(self.identity) && has(self.identity.model) && has(self.identity.backendFramework))",message="When enabled, either checkpointRef or both identity.model and identity.backendFramework must be specified"
type ServiceCheckpointConfig struct {
	// Enabled indicates whether checkpointing is enabled for this service
	// +optional
	// +kubebuilder:default=false
	Enabled bool `json:"enabled,omitempty"`

	// Mode defines how checkpoint creation is handled
	// - Auto: DGD controller creates Checkpoint CR automatically
	// - Manual: User must create Checkpoint CR
	// +optional
	// +kubebuilder:default=Auto
	Mode CheckpointMode `json:"mode,omitempty"`

196
197
	// CheckpointRef references an existing DynamoCheckpoint CR by metadata.name.
	// If specified, this service's Identity is ignored and the referenced checkpoint is used directly.
198
199
200
201
202
203
204
205
206
	// +optional
	CheckpointRef *string `json:"checkpointRef,omitempty"`

	// Identity defines the checkpoint identity for hash computation
	// Used when Mode is Auto or when looking up existing checkpoints
	// Required when checkpointRef is not specified
	// +optional
	Identity *DynamoCheckpointIdentity `json:"identity,omitempty"`
}