Unverified Commit 56e99232 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

fix: do not set default for resources (#2471)

parent 94876e58
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-agg dynamoNamespace: sglang-agg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
...@@ -31,13 +24,7 @@ spec: ...@@ -31,13 +24,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-agg-router dynamoNamespace: sglang-agg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
...@@ -31,13 +24,7 @@ spec: ...@@ -31,13 +24,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -35,12 +35,7 @@ spec: ...@@ -35,12 +35,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "40Gi"
limits: limits:
cpu: "10"
memory: "40Gi"
gpu: "4" gpu: "4"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -67,12 +62,7 @@ spec: ...@@ -67,12 +62,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "40Gi"
limits: limits:
cpu: "10"
memory: "40Gi"
gpu: "4" gpu: "4"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-disagg dynamoNamespace: sglang-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
...@@ -31,13 +24,7 @@ spec: ...@@ -31,13 +24,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -64,13 +51,7 @@ spec: ...@@ -64,13 +51,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -18,13 +18,6 @@ spec: ...@@ -18,13 +18,6 @@ spec:
dynamoNamespace: dynamo dynamoNamespace: dynamo
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1 image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
...@@ -56,13 +49,6 @@ spec: ...@@ -56,13 +49,6 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "8"
memory: "16Gi"
pvc: pvc:
create: false create: false
name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run
...@@ -107,13 +93,6 @@ spec: ...@@ -107,13 +93,6 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "8"
memory: "16Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1 image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
...@@ -129,13 +108,7 @@ spec: ...@@ -129,13 +108,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -161,13 +134,7 @@ spec: ...@@ -161,13 +134,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "80Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-agg dynamoNamespace: trtllm-agg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
...@@ -33,13 +26,7 @@ spec: ...@@ -33,13 +26,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -26,13 +26,7 @@ spec: ...@@ -26,13 +26,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-disagg dynamoNamespace: trtllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
...@@ -33,13 +26,7 @@ spec: ...@@ -33,13 +26,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -56,13 +43,7 @@ spec: ...@@ -56,13 +43,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-v1-disagg-router dynamoNamespace: trtllm-v1-disagg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
...@@ -33,13 +26,7 @@ spec: ...@@ -33,13 +26,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -56,13 +43,7 @@ spec: ...@@ -56,13 +43,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits: limits:
cpu: "10"
memory: "20Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -25,6 +25,9 @@ spec: ...@@ -25,6 +25,9 @@ spec:
dynamoNamespace: vllm-agg dynamoNamespace: vllm-agg
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......
...@@ -27,6 +27,9 @@ spec: ...@@ -27,6 +27,9 @@ spec:
replicas: 2 replicas: 2
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
resources:
limits:
gpu: "1"
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
...@@ -11,13 +11,6 @@ spec: ...@@ -11,13 +11,6 @@ spec:
dynamoNamespace: vllm-disagg dynamoNamespace: vllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "32"
memory: "10Gi"
limits:
cpu: "32"
memory: "10Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
...@@ -33,13 +26,7 @@ spec: ...@@ -33,13 +26,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "32"
memory: "40Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "40Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -56,13 +43,7 @@ spec: ...@@ -56,13 +43,7 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
requests:
cpu: "32"
memory: "40Gi"
gpu: "1"
limits: limits:
cpu: "32"
memory: "40Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -18,13 +18,6 @@ spec: ...@@ -18,13 +18,6 @@ spec:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources:
requests:
cpu: "32"
memory: "10Gi"
limits:
cpu: "32"
memory: "10Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
...@@ -58,13 +51,6 @@ spec: ...@@ -58,13 +51,6 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "2"
memory: "2Gi"
pvc: pvc:
create: false create: false
name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run
...@@ -113,13 +99,6 @@ spec: ...@@ -113,13 +99,6 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "2"
memory: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
...@@ -135,13 +114,7 @@ spec: ...@@ -135,13 +114,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "8"
memory: "16Gi"
gpu: "1"
limits: limits:
cpu: "8"
memory: "16Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -164,13 +137,7 @@ spec: ...@@ -164,13 +137,7 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
requests:
cpu: "8"
memory: "16Gi"
gpu: "1"
limits: limits:
cpu: "8"
memory: "16Gi"
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
......
...@@ -25,6 +25,9 @@ spec: ...@@ -25,6 +25,9 @@ spec:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources:
limits:
gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
...@@ -39,6 +42,9 @@ spec: ...@@ -39,6 +42,9 @@ spec:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......
...@@ -706,6 +706,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -706,6 +706,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
}, },
Limits: &common.ResourceItem{ Limits: &common.ResourceItem{
GPU: "1", GPU: "1",
Memory: "20Gi",
CPU: "10",
}, },
}, },
ExtraPodMetadata: &common.ExtraPodMetadata{ ExtraPodMetadata: &common.ExtraPodMetadata{
......
...@@ -10,7 +10,6 @@ import ( ...@@ -10,7 +10,6 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
) )
...@@ -66,17 +65,6 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co ...@@ -66,17 +65,6 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
FailureThreshold: 10, FailureThreshold: 10,
} }
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
}
// Add standard environment variables // Add standard environment variables
container.Env = append(container.Env, []corev1.EnvVar{ container.Env = append(container.Env, []corev1.EnvVar{
{ {
......
...@@ -8,7 +8,6 @@ package dynamo ...@@ -8,7 +8,6 @@ package dynamo
import ( import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
) )
// PlannerDefaults implements ComponentDefaults for Planner components // PlannerDefaults implements ComponentDefaults for Planner components
...@@ -22,19 +21,6 @@ func NewPlannerDefaults() *PlannerDefaults { ...@@ -22,19 +21,6 @@ func NewPlannerDefaults() *PlannerDefaults {
func (p *PlannerDefaults) GetBaseContainer(context ComponentContext) (corev1.Container, error) { func (p *PlannerDefaults) GetBaseContainer(context ComponentContext) (corev1.Container, error) {
container := p.getCommonContainer(context) container := p.getCommonContainer(context)
// Add planner-specific defaults
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
}
return container, nil return container, nil
} }
......
...@@ -10,7 +10,6 @@ import ( ...@@ -10,7 +10,6 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
) )
func TestPlannerDefaults_GetBaseContainer(t *testing.T) { func TestPlannerDefaults_GetBaseContainer(t *testing.T) {
...@@ -47,16 +46,6 @@ func TestPlannerDefaults_GetBaseContainer(t *testing.T) { ...@@ -47,16 +46,6 @@ func TestPlannerDefaults_GetBaseContainer(t *testing.T) {
"/bin/sh", "/bin/sh",
"-c", "-c",
}, },
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
},
Env: []corev1.EnvVar{ Env: []corev1.EnvVar{
{Name: "DYN_NAMESPACE", Value: "dynamo-namespace"}, {Name: "DYN_NAMESPACE", Value: "dynamo-namespace"},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "name"}, {Name: "DYN_PARENT_DGD_K8S_NAME", Value: "name"},
......
...@@ -10,7 +10,6 @@ import ( ...@@ -10,7 +10,6 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
) )
...@@ -35,19 +34,6 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont ...@@ -35,19 +34,6 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
}, },
} }
// Add worker base defaults
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("20Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("20Gi"),
"nvidia.com/gpu": resource.MustParse("1"),
},
}
container.LivenessProbe = &corev1.Probe{ container.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{ HTTPGet: &corev1.HTTPGetAction{
......
...@@ -24,7 +24,7 @@ Dynamo supports multinode deployments through the `multinode` section in resourc ...@@ -24,7 +24,7 @@ Dynamo supports multinode deployments through the `multinode` section in resourc
For sophisticated multinode deployments, Dynamo integrates with advanced Kubernetes orchestration systems: For sophisticated multinode deployments, Dynamo integrates with advanced Kubernetes orchestration systems:
- **[Grove](https://github.com/NVIDIA/grove)**: Network topology-aware gang scheduling and auto-scaling for AI workloads - **[Grove](https://github.com/NVIDIA/grove/blob/main/docs/getting-started.md)**: Network topology-aware gang scheduling and auto-scaling for AI workloads
- (optional) **[KAI-Scheduler](https://github.com/NVIDIA/KAI-Scheduler)**: Kubernetes native scheduler optimized for AI workloads at scale - (optional) **[KAI-Scheduler](https://github.com/NVIDIA/KAI-Scheduler)**: Kubernetes native scheduler optimized for AI workloads at scale
These systems provide enhanced scheduling capabilities including topology-aware placement, gang scheduling, and coordinated auto-scaling across multiple nodes. These systems provide enhanced scheduling capabilities including topology-aware placement, gang scheduling, and coordinated auto-scaling across multiple nodes.
...@@ -66,12 +66,7 @@ The `multinode` section in a resource specification defines how many physical no ...@@ -66,12 +66,7 @@ The `multinode` section in a resource specification defines how many physical no
multinode: multinode:
nodeCount: 2 nodeCount: 2
resources: resources:
requests:
cpu: "10"
memory: "40Gi"
limits: limits:
cpu: "10"
memory: "40Gi"
gpu: "2" # 2 GPUs per node gpu: "2" # 2 GPUs per node
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment