Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9c8b2c2a
Unverified
Commit
9c8b2c2a
authored
Jul 25, 2025
by
Nick Hill
Committed by
GitHub
Jul 24, 2025
Browse files
[DP] Support api-server-count > 0 in hybrid DP LB mode (#21510)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
2212cd6c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
9 deletions
+5
-9
tests/v1/test_hybrid_lb_dp.py
tests/v1/test_hybrid_lb_dp.py
+1
-1
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+4
-8
No files found.
tests/v1/test_hybrid_lb_dp.py
View file @
9c8b2c2a
...
...
@@ -147,7 +147,7 @@ def default_server_args():
]
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
1
])
# Only 1 API server for now
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
1
,
4
])
def
servers
(
request
,
default_server_args
):
api_server_count
=
request
.
param
with
HybridLBServerManager
(
MODEL_NAME
,
DP_SIZE
,
api_server_count
,
...
...
vllm/entrypoints/cli/serve.py
View file @
9c8b2c2a
...
...
@@ -165,18 +165,14 @@ def run_multi_api_server(args: argparse.Namespace):
" api_server_count > 1"
)
model_config
.
disable_mm_preprocessor_cache
=
True
if
vllm_config
.
parallel_config
.
data_parallel_hybrid_lb
:
raise
NotImplementedError
(
"Hybrid load balancing with --api-server-count > 0"
"is not yet supported."
)
executor_class
=
Executor
.
get_class
(
vllm_config
)
log_stats
=
not
engine_args
.
disable_log_stats
parallel_config
=
vllm_config
.
parallel_config
dp_rank
=
parallel_config
.
data_parallel_rank
external_dp_lb
=
parallel_config
.
data_parallel_external_lb
assert
external_dp_lb
or
dp_rank
==
0
hybrid_dp_lb
=
parallel_config
.
data_parallel_hybrid_lb
assert
external_dp_lb
or
hybrid_dp_lb
or
dp_rank
==
0
api_server_manager
:
Optional
[
APIServerProcessManager
]
=
None
...
...
@@ -196,12 +192,12 @@ def run_multi_api_server(args: argparse.Namespace):
stats_update_address
=
coordinator
.
get_stats_publish_address
()
if
coordinator
else
None
)
# For dp ranks > 0 in external DP LB mode, we must delay the
# For dp ranks > 0 in external
/hybrid
DP LB mode
s
, we must delay the
# start of the API servers until the local engine is started
# (after the launcher context manager exits),
# since we get the front-end stats update address from the coordinator
# via the handshake with the local engine.
if
dp_rank
==
0
or
not
external_dp_lb
:
if
dp_rank
==
0
or
not
(
external_dp_lb
or
hybrid_dp_lb
)
:
# Start API servers using the manager.
api_server_manager
=
APIServerProcessManager
(
**
api_server_manager_kwargs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment