Unverified Commit 2c642fd0 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: vllm deployment examples (#2062)

parent 1958b3aa
...@@ -80,4 +80,4 @@ spec: ...@@ -80,4 +80,4 @@ spec:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
args: args:
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
...@@ -80,7 +80,7 @@ spec: ...@@ -80,7 +80,7 @@ spec:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
args: args:
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg dynamoNamespace: vllm-v1-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -119,4 +119,4 @@ spec: ...@@ -119,4 +119,4 @@ spec:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
args: args:
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"
...@@ -80,7 +80,7 @@ spec: ...@@ -80,7 +80,7 @@ spec:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
args: args:
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-planner dynamoNamespace: vllm-v1-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -119,4 +119,4 @@ spec: ...@@ -119,4 +119,4 @@ spec:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
args: args:
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log" - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment