Unverified Commit 56e99232 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

fix: do not set default for resources (#2471)

parent 94876e58
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-agg
componentType: frontend
replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
......@@ -31,13 +24,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-agg-router
componentType: frontend
replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
......@@ -31,13 +24,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -35,12 +35,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "40Gi"
limits:
cpu: "10"
memory: "40Gi"
gpu: "4"
extraPodSpec:
mainContainer:
......@@ -67,12 +62,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "40Gi"
limits:
cpu: "10"
memory: "40Gi"
gpu: "4"
extraPodSpec:
mainContainer:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: sglang-disagg
componentType: frontend
replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
......@@ -31,13 +24,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -64,13 +51,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -18,13 +18,6 @@ spec:
dynamoNamespace: dynamo
componentType: frontend
replicas: 1
resources:
requests:
cpu: "10"
memory: "10Gi"
limits:
cpu: "32"
memory: "40Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
......@@ -56,13 +49,6 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "8"
memory: "16Gi"
pvc:
create: false
name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run
......@@ -107,13 +93,6 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "8"
memory: "16Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
......@@ -129,13 +108,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -161,13 +134,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "32"
memory: "80Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-agg
componentType: frontend
replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
......@@ -33,13 +26,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -26,13 +26,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-disagg
componentType: frontend
replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
......@@ -33,13 +26,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -56,13 +43,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: trtllm-v1-disagg-router
componentType: frontend
replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
......@@ -33,13 +26,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -56,13 +43,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -25,6 +25,9 @@ spec:
dynamoNamespace: vllm-agg
componentType: worker
replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......
......@@ -27,6 +27,9 @@ spec:
replicas: 2
extraPodSpec:
mainContainer:
resources:
limits:
gpu: "1"
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
......@@ -11,13 +11,6 @@ spec:
dynamoNamespace: vllm-disagg
componentType: frontend
replicas: 1
resources:
requests:
cpu: "32"
memory: "10Gi"
limits:
cpu: "32"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......@@ -33,13 +26,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "32"
memory: "40Gi"
gpu: "1"
limits:
cpu: "32"
memory: "40Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -56,13 +43,7 @@ spec:
componentType: worker
replicas: 1
resources:
requests:
cpu: "32"
memory: "40Gi"
gpu: "1"
limits:
cpu: "32"
memory: "40Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -18,13 +18,6 @@ spec:
dynamoNamespace: vllm-disagg-planner
componentType: frontend
replicas: 1
resources:
requests:
cpu: "32"
memory: "10Gi"
limits:
cpu: "32"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
......@@ -58,13 +51,6 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "2"
memory: "2Gi"
pvc:
create: false
name: profiling-pvc # Must be pre-created before deployment and SLA profiler must have been run
......@@ -113,13 +99,6 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
resources:
requests:
cpu: "2"
memory: "2Gi"
limits:
cpu: "2"
memory: "2Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
......@@ -135,13 +114,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "8"
memory: "16Gi"
gpu: "1"
limits:
cpu: "8"
memory: "16Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......@@ -164,13 +137,7 @@ spec:
componentType: worker
replicas: 2
resources:
requests:
cpu: "8"
memory: "16Gi"
gpu: "1"
limits:
cpu: "8"
memory: "16Gi"
gpu: "1"
extraPodSpec:
mainContainer:
......
......@@ -25,6 +25,9 @@ spec:
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......@@ -39,6 +42,9 @@ spec:
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
......
......@@ -705,7 +705,9 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Memory: "500Mi",
},
Limits: &common.ResourceItem{
GPU: "1",
GPU: "1",
Memory: "20Gi",
CPU: "10",
},
},
ExtraPodMetadata: &common.ExtraPodMetadata{
......
......@@ -10,7 +10,6 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr"
)
......@@ -66,17 +65,6 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
FailureThreshold: 10,
}
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
}
// Add standard environment variables
container.Env = append(container.Env, []corev1.EnvVar{
{
......
......@@ -8,7 +8,6 @@ package dynamo
import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
// PlannerDefaults implements ComponentDefaults for Planner components
......@@ -22,19 +21,6 @@ func NewPlannerDefaults() *PlannerDefaults {
func (p *PlannerDefaults) GetBaseContainer(context ComponentContext) (corev1.Container, error) {
container := p.getCommonContainer(context)
// Add planner-specific defaults
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
}
return container, nil
}
......
......@@ -10,7 +10,6 @@ import (
"github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
func TestPlannerDefaults_GetBaseContainer(t *testing.T) {
......@@ -47,16 +46,6 @@ func TestPlannerDefaults_GetBaseContainer(t *testing.T) {
"/bin/sh",
"-c",
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
},
Env: []corev1.EnvVar{
{Name: "DYN_NAMESPACE", Value: "dynamo-namespace"},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "name"},
......
......@@ -10,7 +10,6 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr"
)
......@@ -35,19 +34,6 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
}
// Add worker base defaults
container.Resources = corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("20Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("20Gi"),
"nvidia.com/gpu": resource.MustParse("1"),
},
}
container.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
......
......@@ -24,7 +24,7 @@ Dynamo supports multinode deployments through the `multinode` section in resourc
For sophisticated multinode deployments, Dynamo integrates with advanced Kubernetes orchestration systems:
- **[Grove](https://github.com/NVIDIA/grove)**: Network topology-aware gang scheduling and auto-scaling for AI workloads
- **[Grove](https://github.com/NVIDIA/grove/blob/main/docs/getting-started.md)**: Network topology-aware gang scheduling and auto-scaling for AI workloads
- (optional) **[KAI-Scheduler](https://github.com/NVIDIA/KAI-Scheduler)**: Kubernetes native scheduler optimized for AI workloads at scale
These systems provide enhanced scheduling capabilities including topology-aware placement, gang scheduling, and coordinated auto-scaling across multiple nodes.
......@@ -66,12 +66,7 @@ The `multinode` section in a resource specification defines how many physical no
multinode:
nodeCount: 2
resources:
requests:
cpu: "10"
memory: "40Gi"
limits:
cpu: "10"
memory: "40Gi"
gpu: "2" # 2 GPUs per node
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment