Unverified Commit b8461b6c authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: updated health checks to use new probes (#2124)

parent 222245e2
...@@ -48,24 +48,19 @@ spec: ...@@ -48,24 +48,19 @@ spec:
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
dynamoNamespace: vllm-agg dynamoNamespace: vllm-agg
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -78,8 +73,21 @@ spec: ...@@ -78,8 +73,21 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
...@@ -48,24 +48,19 @@ spec: ...@@ -48,24 +48,19 @@ spec:
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
dynamoNamespace: vllm-agg-router dynamoNamespace: vllm-agg-router
componentType: worker componentType: worker
replicas: 2 replicas: 2
...@@ -78,8 +73,21 @@ spec: ...@@ -78,8 +73,21 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
...@@ -51,24 +51,19 @@ spec: ...@@ -51,24 +51,19 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "32" cpu: "32"
...@@ -78,8 +73,21 @@ spec: ...@@ -78,8 +73,21 @@ spec:
cpu: "32" cpu: "32"
memory: "40Gi" memory: "40Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
...@@ -93,24 +101,19 @@ spec: ...@@ -93,24 +101,19 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "32" cpu: "32"
...@@ -120,8 +123,21 @@ spec: ...@@ -120,8 +123,21 @@ spec:
cpu: "32" cpu: "32"
memory: "40Gi" memory: "40Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
...@@ -51,24 +51,19 @@ spec: ...@@ -51,24 +51,19 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "10" cpu: "10"
...@@ -78,8 +73,21 @@ spec: ...@@ -78,8 +73,21 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
...@@ -93,24 +101,19 @@ spec: ...@@ -93,24 +101,19 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "10" cpu: "10"
...@@ -120,8 +123,21 @@ spec: ...@@ -120,8 +123,21 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
...@@ -51,24 +51,19 @@ spec: ...@@ -51,24 +51,19 @@ spec:
componentType: worker componentType: worker
replicas: 2 replicas: 2
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "10" cpu: "10"
...@@ -78,8 +73,19 @@ spec: ...@@ -78,8 +73,19 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
...@@ -93,24 +99,19 @@ spec: ...@@ -93,24 +99,19 @@ spec:
componentType: worker componentType: worker
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
exec: httpGet:
command: path: /live
- /bin/sh port: 9090
- -c periodSeconds: 5
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 1
readinessProbe: readinessProbe:
exec: httpGet:
command: path: /health
- /bin/sh port: 9090
- -c periodSeconds: 10
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 60
resources: resources:
requests: requests:
cpu: "10" cpu: "10"
...@@ -120,8 +121,21 @@ spec: ...@@ -120,8 +121,21 @@ spec:
cpu: "10" cpu: "10"
memory: "20Gi" memory: "20Gi"
gpu: "1" gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment