"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "76c70f4155cd8246509e594b4aa9aafda8666442"
Unverified Commit 640c2d30 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: sglang dsr1 recipes (#3850)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 8de469cf
......@@ -45,29 +45,38 @@ spec:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
exec python3 -m dynamo.sglang
--model-path deepseek-ai/DeepSeek-R1
--served-model-name deepseek-ai/DeepSeek-R1
--tp 16
--dp 16
--enable-dp-attention
--ep-size 16
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode decode
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8
--host 0.0.0.0
- --model-path
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
- "16"
- --dp
- "16"
- --enable-dp-attention
- --ep-size
- "16"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.75"
- --host
- 0.0.0.0
prefill:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
......@@ -89,24 +98,32 @@ spec:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
exec python3 -m dynamo.sglang
--model-path deepseek-ai/DeepSeek-R1
--served-model-name deepseek-ai/DeepSeek-R1
--tp 16
--ep-size 16
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode prefill
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8
--host 0.0.0.0
\ No newline at end of file
- --model-path
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
- "16"
- --ep-size
- "16"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.75"
- --host
- 0.0.0.0
\ No newline at end of file
......@@ -43,28 +43,36 @@ spec:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
exec python3 -m dynamo.sglang
--model-path deepseek-ai/DeepSeek-R1
--served-model-name deepseek-ai/DeepSeek-R1
--tp 8
--dp 8
--enable-dp-attention
--ep-size 8
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode decode
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--host 0.0.0.0
- --model-path
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
- "8"
- --dp
- "8"
- --enable-dp-attention
- --ep-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --host
- 0.0.0.0
prefill:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
......@@ -84,23 +92,30 @@ spec:
path: /health
port: 9090
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
exec python3 -m dynamo.sglang
--model-path deepseek-ai/DeepSeek-R1
--served-model-name deepseek-ai/DeepSeek-R1
--tp 8
--ep-size 8
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode prefill
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--host 0.0.0.0
\ No newline at end of file
- --model-path
- deepseek-ai/DeepSeek-R1
- --served-model-name
- deepseek-ai/DeepSeek-R1
- --tp
- "8"
- --ep-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --host
- 0.0.0.0
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment