Unverified Commit b8461b6c authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: updated health checks to use new probes (#2124)

parent 222245e2
......@@ -48,24 +48,19 @@ spec:
VllmDecodeWorker:
envFromSecret: hf-token-secret
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
dynamoNamespace: vllm-agg
componentType: worker
replicas: 1
......@@ -78,8 +73,21 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
......@@ -48,24 +48,19 @@ spec:
VllmDecodeWorker:
envFromSecret: hf-token-secret
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
dynamoNamespace: vllm-agg-router
componentType: worker
replicas: 2
......@@ -78,8 +73,21 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
......@@ -51,24 +51,19 @@ spec:
componentType: worker
replicas: 1
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "32"
......@@ -78,8 +73,21 @@ spec:
cpu: "32"
memory: "40Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......@@ -93,24 +101,19 @@ spec:
componentType: worker
replicas: 1
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "32"
......@@ -120,8 +123,21 @@ spec:
cpu: "32"
memory: "40Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
......@@ -51,24 +51,19 @@ spec:
componentType: worker
replicas: 1
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "10"
......@@ -78,8 +73,21 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......@@ -93,24 +101,19 @@ spec:
componentType: worker
replicas: 1
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "10"
......@@ -120,8 +123,21 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
......@@ -51,24 +51,19 @@ spec:
componentType: worker
replicas: 2
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "10"
......@@ -78,8 +73,19 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......@@ -93,24 +99,19 @@ spec:
componentType: worker
replicas: 1
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
httpGet:
path: /live
port: 9090
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 1
readinessProbe:
exec:
command:
- /bin/sh
- -c
- 'grep "VllmWorker.*has been initialized" /tmp/vllm.log'
initialDelaySeconds: 60
periodSeconds: 60
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 30
failureThreshold: 10
failureThreshold: 60
resources:
requests:
cpu: "10"
......@@ -120,8 +121,21 @@ spec:
cpu: "10"
memory: "20Gi"
gpu: "1"
envs:
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
value: "[\"generate\"]"
- name: DYN_SYSTEM_PORT
value: "9090"
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment