Unverified Commit c183aa31 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: fix gpu resource spec in llm deployments (#1812)

parent 24bede9b
...@@ -79,11 +79,11 @@ spec: ...@@ -79,11 +79,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -104,11 +104,11 @@ spec: ...@@ -104,11 +104,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -79,11 +79,11 @@ spec: ...@@ -79,11 +79,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -106,11 +106,11 @@ spec: ...@@ -106,11 +106,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -104,11 +104,11 @@ spec: ...@@ -104,11 +104,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -131,11 +131,11 @@ spec: ...@@ -131,11 +131,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -54,11 +54,11 @@ spec: ...@@ -54,11 +54,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -54,11 +54,11 @@ spec: ...@@ -54,11 +54,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -81,11 +81,11 @@ spec: ...@@ -81,11 +81,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -55,11 +55,11 @@ spec: ...@@ -55,11 +55,11 @@ spec:
requests: requests:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
limits: limits:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -83,11 +83,11 @@ spec: ...@@ -83,11 +83,11 @@ spec:
requests: requests:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
limits: limits:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -79,11 +79,11 @@ spec: ...@@ -79,11 +79,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -79,11 +79,11 @@ spec: ...@@ -79,11 +79,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -106,11 +106,11 @@ spec: ...@@ -106,11 +106,11 @@ spec:
requests: requests:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
limits: limits:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
nvidia.com/gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
...@@ -81,11 +81,11 @@ spec: ...@@ -81,11 +81,11 @@ spec:
requests: requests:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
limits: limits:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
...@@ -109,11 +109,11 @@ spec: ...@@ -109,11 +109,11 @@ spec:
requests: requests:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
limits: limits:
cpu: "20" cpu: "20"
memory: "40Gi" memory: "40Gi"
nvidia.com/gpu: "2" gpu: "2"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:latest
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment