Unverified Commit f3f764eb authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: use hf id in dsr1 recipe to support DGDR (#4481)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 473cb57e
...@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment ...@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: sgl-dsr1-16gpu name: sgl-dsr1-16gpu
spec: spec:
envs:
- name: HF_HOME
value: /opt/model
pvcs: pvcs:
- name: model-cache-pvc - name: model-cache
create: false create: false
services: services:
Frontend: Frontend:
...@@ -16,13 +19,6 @@ spec: ...@@ -16,13 +19,6 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 8000
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
...@@ -34,19 +30,12 @@ spec: ...@@ -34,19 +30,12 @@ spec:
limits: limits:
gpu: "8" gpu: "8"
volumeMounts: volumeMounts:
- name: model-cache-pvc - name: model-cache
mountPoint: /model-cache mountPoint: /opt/model
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
...@@ -55,7 +44,7 @@ spec: ...@@ -55,7 +44,7 @@ spec:
- dynamo.sglang - dynamo.sglang
args: args:
- --model-path - --model-path
- /model-cache/deepseek-r1 - deepseek-ai/DeepSeek-R1
- --served-model-name - --served-model-name
- deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
- --tp - --tp
...@@ -86,19 +75,12 @@ spec: ...@@ -86,19 +75,12 @@ spec:
limits: limits:
gpu: "8" gpu: "8"
volumeMounts: volumeMounts:
- name: model-cache-pvc - name: model-cache
mountPoint: /model-cache mountPoint: /opt/model
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
...@@ -107,7 +89,7 @@ spec: ...@@ -107,7 +89,7 @@ spec:
- dynamo.sglang - dynamo.sglang
args: args:
- --model-path - --model-path
- /model-cache/deepseek-r1 - deepseek-ai/DeepSeek-R1
- --served-model-name - --served-model-name
- deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
- --tp - --tp
......
...@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment ...@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: sgl-dsr1-8gpu name: sgl-dsr1-8gpu
spec: spec:
envs:
- name: HF_HOME
value: /opt/model
pvcs: pvcs:
- name: model-cache-pvc - name: model-cache
create: false create: false
services: services:
Frontend: Frontend:
...@@ -16,13 +19,6 @@ spec: ...@@ -16,13 +19,6 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 8000
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
...@@ -32,28 +28,21 @@ spec: ...@@ -32,28 +28,21 @@ spec:
limits: limits:
gpu: "8" gpu: "8"
volumeMounts: volumeMounts:
- name: model-cache-pvc - name: model-cache
mountPoint: /model-cache mountPoint: /opt/model
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo workingDir: /workspace
command: command:
- python3 - python3
- -m - -m
- dynamo.sglang - dynamo.sglang
args: args:
- --model-path - --model-path
- /model-cache/deepseek-r1 - deepseek-ai/DeepSeek-R1
- --served-model-name - --served-model-name
- deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
- --tp - --tp
...@@ -64,7 +53,6 @@ spec: ...@@ -64,7 +53,6 @@ spec:
- --ep-size - --ep-size
- "8" - "8"
- --trust-remote-code - --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode - --disaggregation-mode
- decode - decode
- --disaggregation-bootstrap-port - --disaggregation-bootstrap-port
...@@ -80,28 +68,21 @@ spec: ...@@ -80,28 +68,21 @@ spec:
limits: limits:
gpu: "8" gpu: "8"
volumeMounts: volumeMounts:
- name: model-cache-pvc - name: model-cache
mountPoint: /model-cache mountPoint: /opt/model
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
startupProbe:
httpGet:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /sgl-workspace/dynamo workingDir: /workspace
command: command:
- python3 - python3
- -m - -m
- dynamo.sglang - dynamo.sglang
args: args:
- --model-path - --model-path
- /model-cache/deepseek-r1 - deepseek-ai/DeepSeek-R1
- --served-model-name - --served-model-name
- deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
- --tp - --tp
...@@ -109,7 +90,6 @@ spec: ...@@ -109,7 +90,6 @@ spec:
- --ep-size - --ep-size
- "8" - "8"
- --trust-remote-code - --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode - --disaggregation-mode
- prefill - prefill
- --disaggregation-bootstrap-port - --disaggregation-bootstrap-port
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment