common.go 5.87 KB
Newer Older
1
/*
2
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package v1alpha1

import (
	autoscalingv2 "k8s.io/api/autoscaling/v2"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
)

26
// +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required"
27
28
29
30
type PVC struct {
	// Create indicates to create a new PVC
	Create *bool `json:"create,omitempty"`
	// Name is the name of the PVC
31
	// +kubebuilder:validation:Required
32
	Name *string `json:"name,omitempty"`
33
	// StorageClass to be used for PVC creation. Required when create is true.
34
	StorageClass string `json:"storageClass,omitempty"`
35
	// Size of the volume in Gi, used during PVC creation. Required when create is true.
36
	Size resource.Quantity `json:"size,omitempty"`
37
	// VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
38
	VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
}

// VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
type VolumeMount struct {
	// Name references a PVC name defined in the top-level PVCs map
	// +kubebuilder:validation:Required
	Name string `json:"name,omitempty"`
	// MountPoint specifies where to mount the volume.
	// If useAsCompilationCache is true and mountPoint is not specified,
	// a backend-specific default will be used.
	MountPoint string `json:"mountPoint,omitempty"`
	// UseAsCompilationCache indicates this volume should be used as a compilation cache.
	// When true, backend-specific environment variables will be set and default mount points may be used.
	// +kubebuilder:default=false
	UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
54
55
}

56
57
58
// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
// for migration guidance. This field will be removed in a future API version.
59
type Autoscaling struct {
60
61
62
63
64
65
66
67
68
69
	// Deprecated: This field is ignored.
	Enabled bool `json:"enabled,omitempty"`
	// Deprecated: This field is ignored.
	MinReplicas int `json:"minReplicas,omitempty"`
	// Deprecated: This field is ignored.
	MaxReplicas int `json:"maxReplicas,omitempty"`
	// Deprecated: This field is ignored.
	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
	// Deprecated: This field is ignored.
	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
70
}
71
72
73
74
75

type SharedMemorySpec struct {
	Disabled bool              `json:"disabled,omitempty"`
	Size     resource.Quantity `json:"size,omitempty"`
}
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

type ResourceItem struct {
	// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
	CPU string `json:"cpu,omitempty"`
	// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
	Memory string `json:"memory,omitempty"`
	// GPU indicates the number of GPUs to request.
	// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
	GPU string `json:"gpu,omitempty"`
	// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
	// By default if not specified, the GPU type is "nvidia.com/gpu"
	GPUType string `json:"gpuType,omitempty"`
	// Custom specifies additional custom resource requests/limits
	Custom map[string]string `json:"custom,omitempty"`
}

// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type Resources struct {
	// Requests specifies the minimum resources required by the component
	Requests *ResourceItem `json:"requests,omitempty"`
	// Limits specifies the maximum resources allowed for the component
	Limits *ResourceItem `json:"limits,omitempty"`
	// Claims specifies resource claims for dynamic resource allocation
	Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}

type DeploymentTargetHPAConf struct {
	CPU         *int32  `json:"cpu,omitempty"`
	GPU         *int32  `json:"gpu,omitempty"`
	Memory      *string `json:"memory,omitempty"`
	QPS         *int64  `json:"qps,omitempty"`
	MinReplicas *int32  `json:"min_replicas,omitempty"`
	MaxReplicas *int32  `json:"max_replicas,omitempty"`
}

type LabelItemSchema struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

type ExtraPodMetadata struct {
	Annotations map[string]string `json:"annotations,omitempty"`
	Labels      map[string]string `json:"labels,omitempty"`
}

type ExtraPodSpec struct {
	*corev1.PodSpec `json:",inline"`
	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
}
126
127
128
129
130
131
132
133
134
135
136
137

// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
// for replica management. When enabled (default), the DGDSA owns the replicas field and
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
type ScalingAdapter struct {
	// Disable indicates whether the ScalingAdapter should be disabled for this service.
	// When false (default), a DGDSA is created and owns the replicas field.
	// When true, no DGDSA is created and replicas can be modified directly in the DGD.
	// +optional
	// +kubebuilder:default=false
	Disable bool `json:"disable,omitempty"`
}