Unverified Commit 04a6eb6c authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: add prefill load balance method args for deepseek-r1 (#4051)


Co-authored-by: default avatardagil-nvidia <dagil@nvidia.com>
parent c837b5ba
......@@ -48,7 +48,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang
workingDir: /sgl-workspace/dynamo
command:
- python3
- -m
......@@ -77,6 +77,7 @@ spec:
- "0.75"
- --host
- 0.0.0.0
- --prefill-round-robin-balance
prefill:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
......@@ -101,7 +102,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang
workingDir: /sgl-workspace/dynamo
command:
- python3
- -m
......@@ -126,4 +127,6 @@ spec:
- --mem-fraction-static
- "0.75"
- --host
- 0.0.0.0
\ No newline at end of file
- 0.0.0.0
- --load-balance-method
- round_robin
\ No newline at end of file
......@@ -46,7 +46,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang
workingDir: /sgl-workspace/dynamo
command:
- python3
- -m
......@@ -73,6 +73,7 @@ spec:
- "30001"
- --host
- 0.0.0.0
- --prefill-round-robin-balance
prefill:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
......@@ -95,7 +96,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang
workingDir: /sgl-workspace/dynamo
command:
- python3
- -m
......@@ -118,4 +119,6 @@ spec:
- --disaggregation-bootstrap-port
- "30001"
- --host
- 0.0.0.0
\ No newline at end of file
- 0.0.0.0
- --load-balance-method
- round_robin
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment