"lib/vscode:/vscode.git/clone" did not exist on "dc75cf18a681e4007de01d93d9e1f77009debf6c"
Unverified Commit f3f764eb authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: use hf id in dsr1 recipe to support DGDR (#4481)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 473cb57e
......@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
metadata:
name: sgl-dsr1-16gpu
spec:
envs:
- name: HF_HOME
value: /opt/model
pvcs:
- name: model-cache-pvc
- name: model-cache
create: false
services:
Frontend:
......@@ -16,13 +19,6 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 8000
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: my-registry/sglang-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-16gpu
......@@ -34,19 +30,12 @@ spec:
limits:
gpu: "8"
volumeMounts:
- name: model-cache-pvc
mountPoint: /model-cache
- name: model-cache
mountPoint: /opt/model
sharedMemory:
size: 80Gi
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo
command:
......@@ -55,7 +44,7 @@ spec:
- dynamo.sglang
args:
- --model-path
- /model-cache/deepseek-r1
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
......@@ -86,19 +75,12 @@ spec:
limits:
gpu: "8"
volumeMounts:
- name: model-cache-pvc
mountPoint: /model-cache
- name: model-cache
mountPoint: /opt/model
sharedMemory:
size: 80Gi
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo
command:
......@@ -107,7 +89,7 @@ spec:
- dynamo.sglang
args:
- --model-path
- /model-cache/deepseek-r1
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
......
......@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
metadata:
name: sgl-dsr1-8gpu
spec:
envs:
- name: HF_HOME
value: /opt/model
pvcs:
- name: model-cache-pvc
- name: model-cache
create: false
services:
Frontend:
......@@ -16,13 +19,6 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 8000
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: my-registry/sglang-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-8gpu
......@@ -32,28 +28,21 @@ spec:
limits:
gpu: "8"
volumeMounts:
- name: model-cache-pvc
mountPoint: /model-cache
- name: model-cache
mountPoint: /opt/model
sharedMemory:
size: 80Gi
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo
workingDir: /workspace
command:
- python3
- -m
- dynamo.sglang
args:
- --model-path
- /model-cache/deepseek-r1
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
......@@ -64,7 +53,6 @@ spec:
- --ep-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-bootstrap-port
......@@ -80,28 +68,21 @@ spec:
limits:
gpu: "8"
volumeMounts:
- name: model-cache-pvc
mountPoint: /model-cache
- name: model-cache
mountPoint: /opt/model
sharedMemory:
size: 80Gi
extraPodSpec:
mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo
workingDir: /workspace
command:
- python3
- -m
- dynamo.sglang
args:
- --model-path
- /model-cache/deepseek-r1
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
......@@ -109,7 +90,6 @@ spec:
- --ep-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-bootstrap-port
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment