Unverified Commit 9f3cc6e9 authored by ptarasiewiczNV's avatar ptarasiewiczNV Committed by GitHub
Browse files

fix: Use fixed vLLM DSR1 checkpoint path (#5721)


Signed-off-by: default avatarPiotr Tarasiewicz <ptarasiewicz@nvidia.com>
parent 584020f4
...@@ -7,9 +7,6 @@ metadata: ...@@ -7,9 +7,6 @@ metadata:
name: vllm-dsr1 name: vllm-dsr1
spec: spec:
backendFramework: vllm backendFramework: vllm
envs:
- name: HF_HOME
value: /model-cache
pvcs: pvcs:
- name: model-cache - name: model-cache
create: false create: false
...@@ -17,6 +14,9 @@ spec: ...@@ -17,6 +14,9 @@ spec:
Frontend: Frontend:
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
volumeMounts:
- name: model-cache
mountPoint: /model-cache
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe: startupProbe:
...@@ -73,7 +73,7 @@ spec: ...@@ -73,7 +73,7 @@ spec:
args: args:
- | - |
exec python3 -m dynamo.vllm \ exec python3 -m dynamo.vllm \
--model deepseek-ai/DeepSeek-R1 \ --model /model-cache/deepseek-r1 \
--served-model-name deepseek-ai/DeepSeek-R1 \ --served-model-name deepseek-ai/DeepSeek-R1 \
--all2all-backend deepep_low_latency \ --all2all-backend deepep_low_latency \
--data-parallel-hybrid-lb \ --data-parallel-hybrid-lb \
...@@ -132,7 +132,7 @@ spec: ...@@ -132,7 +132,7 @@ spec:
args: args:
- | - |
exec python3 -m dynamo.vllm \ exec python3 -m dynamo.vllm \
--model deepseek-ai/DeepSeek-R1 \ --model /model-cache/deepseek-r1 \
--is-prefill-worker \ --is-prefill-worker \
--served-model-name deepseek-ai/DeepSeek-R1 \ --served-model-name deepseek-ai/DeepSeek-R1 \
--all2all-backend deepep_high_throughput \ --all2all-backend deepep_high_throughput \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment