"lib/kvbm-physical/src/vscode:/vscode.git/clone" did not exist on "b685291960550b4a7d6f1d21e471226f09f8b771"
consts.go 3.07 KB
Newer Older
1
2
package consts

3
4
5
6
7
import (
	"time"

	"k8s.io/apimachinery/pkg/runtime/schema"
)
8

9
10
11
12
13
14
const (
	HPACPUDefaultAverageUtilization = 80

	DefaultUserId = "default"
	DefaultOrgId  = "default"

15
	DynamoServicePort       = 8000
16
17
	DynamoServicePortName   = "http"
	DynamoContainerPortName = "http"
18

19
20
	DynamoSystemPort     = 9090
	DynamoSystemPortName = "system"
21

22
23
	MpiRunSshPort = 2222

24
	EnvDynamoServicePort = "DYNAMO_PORT"
25

26
27
	KubeLabelDynamoSelector = "nvidia.com/selector"

28
29
	KubeAnnotationEnableGrove = "nvidia.com/enable-grove"

30
	KubeLabelDynamoGraphDeploymentName  = "nvidia.com/dynamo-graph-deployment-name"
31
	KubeLabelDynamoComponent            = "nvidia.com/dynamo-component"
32
	KubeLabelDynamoNamespace            = "nvidia.com/dynamo-namespace"
33
	KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
34
	KubeLabelDynamoComponentType        = "nvidia.com/dynamo-component-type"
35

36
37
38
	KubeLabelValueFalse = "false"
	KubeLabelValueTrue  = "true"

39
	KubeLabelDynamoComponentPod = "nvidia.com/dynamo-component-pod"
40
41
42

	KubeResourceGPUNvidia = "nvidia.com/gpu"

43
	DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
44

45
	ComponentTypePlanner      = "planner"
46
	ComponentTypeFrontend     = "frontend"
47
	ComponentTypeWorker       = "worker"
48
	ComponentTypeDefault      = "default"
49
	PlannerServiceAccountName = "planner-serviceaccount"
50
51

	DefaultIngressSuffix = "local"
52
53

	DefaultGroveTerminationDelay = 15 * time.Minute
54
55

	// Metrics related constants
56
57
58
59
60
	KubeAnnotationEnableMetrics  = "nvidia.com/enable-metrics"  // User-provided annotation to control metrics
	KubeLabelMetricsEnabled      = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
	KubeValueNameSharedMemory    = "shared-memory"
	DefaultSharedMemoryMountPath = "/dev/shm"
	DefaultSharedMemorySize      = "8Gi"
61

62
63
64
65
66
67
	// Kai-scheduler related constants
	KubeAnnotationKaiSchedulerQueue = "nvidia.com/kai-scheduler-queue" // User-provided annotation to specify queue name
	KubeLabelKaiSchedulerQueue      = "kai.scheduler/queue"            // Label injected into pods for kai-scheduler
	KaiSchedulerName                = "kai-scheduler"                  // Scheduler name for kai-scheduler
	DefaultKaiSchedulerQueue        = "dynamo"                         // Default queue name when none specified

68
69
70
71
72
73
74
75
76
77
78
79
	// Grove multinode role suffixes
	GroveRoleSuffixLeader = "ldr"
	GroveRoleSuffixWorker = "wkr"

	MpiRunSshSecretName = "mpi-run-ssh-secret"
)

type MultinodeDeploymentType string

const (
	MultinodeDeploymentTypeGrove MultinodeDeploymentType = "grove"
	MultinodeDeploymentTypeLWS   MultinodeDeploymentType = "lws"
80
)
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

// GroupVersionResources for external APIs
var (
	// Grove GroupVersionResources for scaling operations
	PodCliqueGVR = schema.GroupVersionResource{
		Group:    "grove.io",
		Version:  "v1alpha1",
		Resource: "podcliques",
	}
	PodCliqueScalingGroupGVR = schema.GroupVersionResource{
		Group:    "grove.io",
		Version:  "v1alpha1",
		Resource: "podcliquescalinggroups",
	}

	// KAI-Scheduler GroupVersionResource for queue validation
	QueueGVR = schema.GroupVersionResource{
		Group:    "scheduling.run.ai",
		Version:  "v2",
		Resource: "queues",
	}
)