Unverified Commit 640c2d30 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: sglang dsr1 recipes (#3850)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 8de469cf
...@@ -45,29 +45,38 @@ spec: ...@@ -45,29 +45,38 @@ spec:
path: /health path: /health
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 10
failureThreshold: 60 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
exec python3 -m dynamo.sglang - deepseek-ai/DeepSeek-R1
--model-path deepseek-ai/DeepSeek-R1 - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
--tp 16 - --tp
--dp 16 - "16"
--enable-dp-attention - --dp
--ep-size 16 - "16"
--trust-remote-code - --enable-dp-attention
--skip-tokenizer-init - --ep-size
--disaggregation-mode decode - "16"
--disaggregation-transfer-backend nixl - --trust-remote-code
--disaggregation-bootstrap-port 30001 - --skip-tokenizer-init
--mem-fraction-static 0.8 - --disaggregation-mode
--host 0.0.0.0 - decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.75"
- --host
- 0.0.0.0
prefill: prefill:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
...@@ -89,24 +98,32 @@ spec: ...@@ -89,24 +98,32 @@ spec:
path: /health path: /health
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 10
failureThreshold: 60 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
exec python3 -m dynamo.sglang - deepseek-ai/DeepSeek-R1
--model-path deepseek-ai/DeepSeek-R1 - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
--tp 16 - --tp
--ep-size 16 - "16"
--trust-remote-code - --ep-size
--skip-tokenizer-init - "16"
--disaggregation-mode prefill - --trust-remote-code
--disaggregation-transfer-backend nixl - --skip-tokenizer-init
--disaggregation-bootstrap-port 30001 - --disaggregation-mode
--mem-fraction-static 0.8 - prefill
--host 0.0.0.0 - --disaggregation-transfer-backend
\ No newline at end of file - nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.75"
- --host
- 0.0.0.0
\ No newline at end of file
...@@ -43,28 +43,36 @@ spec: ...@@ -43,28 +43,36 @@ spec:
path: /health path: /health
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 10
failureThreshold: 60 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
exec python3 -m dynamo.sglang - deepseek-ai/DeepSeek-R1
--model-path deepseek-ai/DeepSeek-R1 - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
--tp 8 - --tp
--dp 8 - "8"
--enable-dp-attention - --dp
--ep-size 8 - "8"
--trust-remote-code - --enable-dp-attention
--skip-tokenizer-init - --ep-size
--disaggregation-mode decode - "8"
--disaggregation-transfer-backend nixl - --trust-remote-code
--disaggregation-bootstrap-port 30001 - --skip-tokenizer-init
--host 0.0.0.0 - --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --host
- 0.0.0.0
prefill: prefill:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
...@@ -84,23 +92,30 @@ spec: ...@@ -84,23 +92,30 @@ spec:
path: /health path: /health
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 10
failureThreshold: 60 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
exec python3 -m dynamo.sglang - deepseek-ai/DeepSeek-R1
--model-path deepseek-ai/DeepSeek-R1 - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 - deepseek-ai/DeepSeek-R1
--tp 8 - --tp
--ep-size 8 - "8"
--trust-remote-code - --ep-size
--skip-tokenizer-init - "8"
--disaggregation-mode prefill - --trust-remote-code
--disaggregation-transfer-backend nixl - --skip-tokenizer-init
--disaggregation-bootstrap-port 30001 - --disaggregation-mode
--host 0.0.0.0 - prefill
\ No newline at end of file - --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --host
- 0.0.0.0
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment