common.go 11.5 KB
Newer Older
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package v1alpha1

import (
21
22
	"encoding/json"

23
24
25
26
27
	autoscalingv2 "k8s.io/api/autoscaling/v2"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
)

28
// +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required"
29
30
31
32
type PVC struct {
	// Create indicates to create a new PVC
	Create *bool `json:"create,omitempty"`
	// Name is the name of the PVC
33
	// +kubebuilder:validation:Required
34
	Name *string `json:"name,omitempty"`
35
	// StorageClass to be used for PVC creation. Required when create is true.
36
	StorageClass string `json:"storageClass,omitempty"`
37
	// Size of the volume in Gi, used during PVC creation. Required when create is true.
38
	Size resource.Quantity `json:"size,omitempty"`
39
	// VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
40
	VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
}

// VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
type VolumeMount struct {
	// Name references a PVC name defined in the top-level PVCs map
	// +kubebuilder:validation:Required
	Name string `json:"name,omitempty"`
	// MountPoint specifies where to mount the volume.
	// If useAsCompilationCache is true and mountPoint is not specified,
	// a backend-specific default will be used.
	MountPoint string `json:"mountPoint,omitempty"`
	// UseAsCompilationCache indicates this volume should be used as a compilation cache.
	// When true, backend-specific environment variables will be set and default mount points may be used.
	// +kubebuilder:default=false
	UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
56
57
}

58
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
59
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
60
// for migration guidance. This field will be removed in a future API version.
61
type Autoscaling struct {
62
63
64
65
66
67
68
69
70
71
	// Deprecated: This field is ignored.
	Enabled bool `json:"enabled,omitempty"`
	// Deprecated: This field is ignored.
	MinReplicas int `json:"minReplicas,omitempty"`
	// Deprecated: This field is ignored.
	MaxReplicas int `json:"maxReplicas,omitempty"`
	// Deprecated: This field is ignored.
	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
	// Deprecated: This field is ignored.
	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
72
}
73

atchernych's avatar
atchernych committed
74
// +kubebuilder:validation:XValidation:rule="!(has(self.disabled) && self.disabled && has(self.size))",message="sharedMemory.size must not be set when sharedMemory.disabled is true"
75
76
77
78
type SharedMemorySpec struct {
	Disabled bool              `json:"disabled,omitempty"`
	Size     resource.Quantity `json:"size,omitempty"`
}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

type ResourceItem struct {
	// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
	CPU string `json:"cpu,omitempty"`
	// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
	Memory string `json:"memory,omitempty"`
	// GPU indicates the number of GPUs to request.
	// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
	GPU string `json:"gpu,omitempty"`
	// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
	// By default if not specified, the GPU type is "nvidia.com/gpu"
	GPUType string `json:"gpuType,omitempty"`
	// Custom specifies additional custom resource requests/limits
	Custom map[string]string `json:"custom,omitempty"`
}

// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type Resources struct {
	// Requests specifies the minimum resources required by the component
	Requests *ResourceItem `json:"requests,omitempty"`
	// Limits specifies the maximum resources allowed for the component
	Limits *ResourceItem `json:"limits,omitempty"`
	// Claims specifies resource claims for dynamic resource allocation
	Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}

type DeploymentTargetHPAConf struct {
	CPU         *int32  `json:"cpu,omitempty"`
	GPU         *int32  `json:"gpu,omitempty"`
	Memory      *string `json:"memory,omitempty"`
	QPS         *int64  `json:"qps,omitempty"`
	MinReplicas *int32  `json:"min_replicas,omitempty"`
	MaxReplicas *int32  `json:"max_replicas,omitempty"`
}

type LabelItemSchema struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

type ExtraPodMetadata struct {
	Annotations map[string]string `json:"annotations,omitempty"`
	Labels      map[string]string `json:"labels,omitempty"`
}

type ExtraPodSpec struct {
	*corev1.PodSpec `json:",inline"`
	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
}
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// MarshalJSON implements json.Marshaler for ExtraPodSpec.
//
// corev1.PodSpec.Containers is declared without omitempty, so a nil slice
// serializes as "containers": null.  The CRD structural schema defines
// containers as type: array and rejects null.  This custom marshaller shadows
// the Containers field with an omitempty-tagged copy so that nil/empty
// Containers are omitted from the JSON output entirely.
func (e ExtraPodSpec) MarshalJSON() ([]byte, error) {
	// Type alias strips methods from corev1.PodSpec, preventing infinite
	// recursion through any MarshalJSON defined on PodSpec.
	type PodSpecAlias corev1.PodSpec

	aux := struct {
		*PodSpecAlias `json:",inline"`
		Containers    []corev1.Container `json:"containers,omitempty"`
		MainContainer *corev1.Container  `json:"mainContainer,omitempty"`
	}{}

	if e.PodSpec != nil {
		a := PodSpecAlias(*e.PodSpec)
		aux.PodSpecAlias = &a
		aux.Containers = e.PodSpec.Containers
	}
	aux.MainContainer = e.MainContainer

	return json.Marshal(aux)
}

158
159
160
161
162
163
// GPUMemoryServiceMode selects the GMS deployment topology.
type GPUMemoryServiceMode string

const (
	// GMSModeIntraPod runs GMS as a sidecar within the same pod.
	GMSModeIntraPod GPUMemoryServiceMode = "intraPod"
164
165
166
167
	// GMSModeInterPod runs GMS as a separate weight server pod and one or more
	// engine pods per rank, sharing GPUs via DRA ResourceClaims and a shared
	// hostPath volume for UDS sockets. Only valid on FailoverSpec; the
	// GPUMemoryServiceSpec sidecar always runs in intraPod mode.
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
	GMSModeInterPod GPUMemoryServiceMode = "interPod"
)

// GPUMemoryServiceSpec configures the GPU Memory Service (GMS) sidecar for a worker component.
// When enabled, the operator injects a GMS sidecar that provides shared GPU memory access
// via DRA (Dynamic Resource Allocation). The sidecar runs two GMS processes per GPU
// (weights + kv_cache) and communicates with the main container over UDS sockets.
type GPUMemoryServiceSpec struct {
	// Enabled activates the GMS sidecar. GPU resources on the main container
	// are replaced with a DRA ResourceClaim for shared GPU access.
	Enabled bool `json:"enabled"`
	// Mode selects the GMS deployment topology.
	// +kubebuilder:default=intraPod
	// +kubebuilder:validation:Enum=intraPod;interPod
	// +optional
	Mode GPUMemoryServiceMode `json:"mode,omitempty"`
	// DeviceClassName is the DRA DeviceClass to request GPUs from.
	// +kubebuilder:default="gpu.nvidia.com"
	// +optional
	DeviceClassName string `json:"deviceClassName,omitempty"`
}

190
// FailoverSpec configures active-passive failover for a worker component.
191
192
193
194
// For intraPod mode: requires gpuMemoryService.enabled; the main container is cloned
// into engine containers (active + standby) within the same pod.
// For interPod mode: the operator creates a dedicated GMS weight server pod and
// multiple engine pods per rank that share GPUs via DRA resource claims.
195
type FailoverSpec struct {
196
	// Enabled activates failover mode.
197
	Enabled bool `json:"enabled"`
198
199
200
	// Mode selects the failover deployment topology.
	// intraPod: engine containers run within the same pod (requires gpuMemoryService.enabled).
	// interPod: a dedicated GMS weight server pod + engine pods per rank (requires Grove).
201
202
203
204
	// +kubebuilder:default=intraPod
	// +kubebuilder:validation:Enum=intraPod;interPod
	// +optional
	Mode GPUMemoryServiceMode `json:"mode,omitempty"`
205
206
207
208
209
210
	// NumShadows is the number of shadow (standby) engine pods per rank.
	// Total engine pods per rank = NumShadows + 1 (1 primary + NumShadows shadows).
	//
	// NumShadows is only meaningful for mode=interPod; intraPod uses a fixed
	// 1 primary + 1 shadow sidecar layout and any value other than 1 is
	// rejected at admission time.
211
212
213
214
215
216
	// +kubebuilder:default=1
	// +kubebuilder:validation:Minimum=1
	// +optional
	NumShadows int32 `json:"numShadows,omitempty"`
}

217
// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
218
// for replica management. When enabled, the DGDSA owns the replicas field and
219
220
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
type ScalingAdapter struct {
221
222
223
	// Enabled indicates whether the ScalingAdapter should be enabled for this service.
	// When true, a DGDSA is created and owns the replicas field.
	// When false (default), no DGDSA is created and replicas can be modified directly in the DGD.
224
225
	// +optional
	// +kubebuilder:default=false
226
	Enabled bool `json:"enabled,omitempty"`
227
}
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254

// CheckpointMode defines how checkpoint creation is handled
// +kubebuilder:validation:Enum=Auto;Manual
type CheckpointMode string

const (
	// CheckpointModeAuto means the DGD controller will automatically create a Checkpoint CR
	CheckpointModeAuto CheckpointMode = "Auto"
	// CheckpointModeManual means the user must create the Checkpoint CR themselves
	CheckpointModeManual CheckpointMode = "Manual"
)

// ServiceCheckpointConfig configures checkpointing for a DGD service
// +kubebuilder:validation:XValidation:rule="!self.enabled || (has(self.checkpointRef) && size(self.checkpointRef) > 0) || (has(self.identity) && has(self.identity.model) && has(self.identity.backendFramework))",message="When enabled, either checkpointRef or both identity.model and identity.backendFramework must be specified"
type ServiceCheckpointConfig struct {
	// Enabled indicates whether checkpointing is enabled for this service
	// +optional
	// +kubebuilder:default=false
	Enabled bool `json:"enabled,omitempty"`

	// Mode defines how checkpoint creation is handled
	// - Auto: DGD controller creates Checkpoint CR automatically
	// - Manual: User must create Checkpoint CR
	// +optional
	// +kubebuilder:default=Auto
	Mode CheckpointMode `json:"mode,omitempty"`

255
256
	// CheckpointRef references an existing DynamoCheckpoint CR by metadata.name.
	// If specified, this service's Identity is ignored and the referenced checkpoint is used directly.
257
258
259
260
261
262
263
264
265
	// +optional
	CheckpointRef *string `json:"checkpointRef,omitempty"`

	// Identity defines the checkpoint identity for hash computation
	// Used when Mode is Auto or when looking up existing checkpoints
	// Required when checkpointRef is not specified
	// +optional
	Identity *DynamoCheckpointIdentity `json:"identity,omitempty"`
}