consts.go 5 KB
Newer Older
1
2
package consts

3
4
5
6
7
import (
	"time"

	"k8s.io/apimachinery/pkg/runtime/schema"
)
8

9
10
11
12
const (
	DefaultUserId = "default"
	DefaultOrgId  = "default"

13
	DynamoServicePort       = 8000
14
15
	DynamoServicePortName   = "http"
	DynamoContainerPortName = "http"
16

17
18
19
	DynamoPlannerMetricsPort = 9085
	DynamoMetricsPortName    = "metrics"

20
21
	DynamoSystemPort     = 9090
	DynamoSystemPortName = "system"
22

23
24
	MpiRunSshPort = 2222

25
26
27
28
29
	// Default security context values
	// These provide secure defaults for running containers as non-root
	// Users can override these via extraPodSpec.securityContext in their DynamoGraphDeployment
	DefaultSecurityContextFSGroup = 1000

30
	EnvDynamoServicePort = "DYNAMO_PORT"
31

32
33
	KubeLabelDynamoSelector = "nvidia.com/selector"

34
35
	KubeAnnotationEnableGrove = "nvidia.com/enable-grove"

36
	KubeAnnotationDisableImagePullSecretDiscovery = "nvidia.com/disable-image-pull-secret-discovery"
37
	KubeAnnotationDynamoDiscoveryBackend          = "nvidia.com/dynamo-discovery-backend"
38

39
	KubeLabelDynamoGraphDeploymentName  = "nvidia.com/dynamo-graph-deployment-name"
40
	KubeLabelDynamoComponent            = "nvidia.com/dynamo-component"
41
	KubeLabelDynamoNamespace            = "nvidia.com/dynamo-namespace"
42
	KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
43
	KubeLabelDynamoComponentType        = "nvidia.com/dynamo-component-type"
44
	KubeLabelDynamoSubComponentType     = "nvidia.com/dynamo-sub-component-type"
45
46
47
	KubeLabelDynamoBaseModel            = "nvidia.com/dynamo-base-model"
	KubeLabelDynamoBaseModelHash        = "nvidia.com/dynamo-base-model-hash"
	KubeAnnotationDynamoBaseModel       = "nvidia.com/dynamo-base-model"
48
	KubeLabelDynamoDiscoveryBackend     = "nvidia.com/dynamo-discovery-backend"
49
	KubeLabelDynamoDiscoveryEnabled     = "nvidia.com/dynamo-discovery-enabled"
50

51
52
53
	KubeLabelValueFalse = "false"
	KubeLabelValueTrue  = "true"

54
	KubeLabelDynamoComponentPod = "nvidia.com/dynamo-component-pod"
55
56
57

	KubeResourceGPUNvidia = "nvidia.com/gpu"

58
	DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
59
60
61
	DynamoNamespaceEnvVar        = "DYN_NAMESPACE"
	DynamoComponentEnvVar        = "DYN_COMPONENT"
	DynamoDiscoveryBackendEnvVar = "DYN_DISCOVERY_BACKEND"
62

63
64
	GlobalDynamoNamespace = "dynamo"

65
	ComponentTypePlanner      = "planner"
66
	ComponentTypeFrontend     = "frontend"
67
	ComponentTypeWorker       = "worker"
68
69
	ComponentTypePrefill      = "prefill"
	ComponentTypeDecode       = "decode"
70
	ComponentTypeDefault      = "default"
71
	PlannerServiceAccountName = "planner-serviceaccount"
72
73

	DefaultIngressSuffix = "local"
74
75

	DefaultGroveTerminationDelay = 15 * time.Minute
76
77

	// Metrics related constants
78
79
80
81
82
	KubeAnnotationEnableMetrics  = "nvidia.com/enable-metrics"  // User-provided annotation to control metrics
	KubeLabelMetricsEnabled      = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
	KubeValueNameSharedMemory    = "shared-memory"
	DefaultSharedMemoryMountPath = "/dev/shm"
	DefaultSharedMemorySize      = "8Gi"
83

84
85
86
	// Compilation cache default mount points
	DefaultVLLMCacheMountPoint = "/root/.cache/vllm"

87
88
89
90
91
92
	// Kai-scheduler related constants
	KubeAnnotationKaiSchedulerQueue = "nvidia.com/kai-scheduler-queue" // User-provided annotation to specify queue name
	KubeLabelKaiSchedulerQueue      = "kai.scheduler/queue"            // Label injected into pods for kai-scheduler
	KaiSchedulerName                = "kai-scheduler"                  // Scheduler name for kai-scheduler
	DefaultKaiSchedulerQueue        = "dynamo"                         // Default queue name when none specified

93
94
95
	// Grove multinode role suffixes
	GroveRoleSuffixLeader = "ldr"
	GroveRoleSuffixWorker = "wkr"
96
97

	MainContainerName = "main"
98
99

	RestartAnnotation = "nvidia.com/restartAt"
100
101
102
103
104
105
106
107
108
109
110
111
112

	// Resource type constants - match Kubernetes Kind names
	// Used consistently across controllers, webhooks, and metrics
	ResourceTypeDynamoGraphDeployment               = "DynamoGraphDeployment"
	ResourceTypeDynamoComponentDeployment           = "DynamoComponentDeployment"
	ResourceTypeDynamoModel                         = "DynamoModel"
	ResourceTypeDynamoGraphDeploymentRequest        = "DynamoGraphDeploymentRequest"
	ResourceTypeDynamoGraphDeploymentScalingAdapter = "DynamoGraphDeploymentScalingAdapter"

	// Resource state constants - used in status reporting and metrics
	ResourceStateReady    = "ready"
	ResourceStateNotReady = "not_ready"
	ResourceStateUnknown  = "unknown"
113
114
115
116
117
118
119
)

type MultinodeDeploymentType string

const (
	MultinodeDeploymentTypeGrove MultinodeDeploymentType = "grove"
	MultinodeDeploymentTypeLWS   MultinodeDeploymentType = "lws"
120
)
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

// GroupVersionResources for external APIs
var (
	// Grove GroupVersionResources for scaling operations
	PodCliqueGVR = schema.GroupVersionResource{
		Group:    "grove.io",
		Version:  "v1alpha1",
		Resource: "podcliques",
	}
	PodCliqueScalingGroupGVR = schema.GroupVersionResource{
		Group:    "grove.io",
		Version:  "v1alpha1",
		Resource: "podcliquescalinggroups",
	}

	// KAI-Scheduler GroupVersionResource for queue validation
	QueueGVR = schema.GroupVersionResource{
		Group:    "scheduling.run.ai",
		Version:  "v2",
		Resource: "queues",
	}
)