Unverified Commit 04a6eb6c authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

fix: add prefill load balance method args for deepseek-r1 (#4051)


Co-authored-by: default avatardagil-nvidia <dagil@nvidia.com>
parent c837b5ba
...@@ -48,7 +48,7 @@ spec: ...@@ -48,7 +48,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
- -m - -m
...@@ -77,6 +77,7 @@ spec: ...@@ -77,6 +77,7 @@ spec:
- "0.75" - "0.75"
- --host - --host
- 0.0.0.0 - 0.0.0.0
- --prefill-round-robin-balance
prefill: prefill:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
...@@ -101,7 +102,7 @@ spec: ...@@ -101,7 +102,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
- -m - -m
...@@ -126,4 +127,6 @@ spec: ...@@ -126,4 +127,6 @@ spec:
- --mem-fraction-static - --mem-fraction-static
- "0.75" - "0.75"
- --host - --host
- 0.0.0.0 - 0.0.0.0
\ No newline at end of file - --load-balance-method
- round_robin
\ No newline at end of file
...@@ -46,7 +46,7 @@ spec: ...@@ -46,7 +46,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
- -m - -m
...@@ -73,6 +73,7 @@ spec: ...@@ -73,6 +73,7 @@ spec:
- "30001" - "30001"
- --host - --host
- 0.0.0.0 - 0.0.0.0
- --prefill-round-robin-balance
prefill: prefill:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
...@@ -95,7 +96,7 @@ spec: ...@@ -95,7 +96,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/examples/backends/sglang workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
- -m - -m
...@@ -118,4 +119,6 @@ spec: ...@@ -118,4 +119,6 @@ spec:
- --disaggregation-bootstrap-port - --disaggregation-bootstrap-port
- "30001" - "30001"
- --host - --host
- 0.0.0.0 - 0.0.0.0
\ No newline at end of file - --load-balance-method
- round_robin
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment