Unverified Commit 7b325ee8 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: vllm router examples (#1942)

parent a50be1ad
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
metadata: metadata:
name: vllm-v1-agg name: vllm-v1-agg-router
spec: spec:
services: services:
Frontend: Frontend:
...@@ -37,7 +37,7 @@ spec: ...@@ -37,7 +37,7 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
dynamoNamespace: vllm-v1-agg dynamoNamespace: vllm-v1-agg-router
componentType: main componentType: main
replicas: 1 replicas: 1
resources: resources:
...@@ -58,6 +58,8 @@ spec: ...@@ -58,6 +58,8 @@ spec:
- out=dyn - out=dyn
- --http-port - --http-port
- "8000" - "8000"
- --router-mode
- kv
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
livenessProbe: livenessProbe:
...@@ -79,7 +81,7 @@ spec: ...@@ -79,7 +81,7 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
dynamoNamespace: vllm-v1-agg dynamoNamespace: vllm-v1-agg-router
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
......
...@@ -58,6 +58,8 @@ spec: ...@@ -58,6 +58,8 @@ spec:
- out=dyn - out=dyn
- --http-port - --http-port
- "8000" - "8000"
- --router-mode
- kv
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment