Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d3bd1711
Unverified
Commit
d3bd1711
authored
Sep 29, 2025
by
Zhuohan Li
Committed by
GitHub
Sep 30, 2025
Browse files
[Benchmark] Support benchmark throughput for external launcher DP (#25913)
Signed-off-by:
Zhuohan Li
<
zhuohan123@gmail.com
>
parent
89e4050a
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
6 deletions
+27
-6
vllm/benchmarks/throughput.py
vllm/benchmarks/throughput.py
+27
-6
No files found.
vllm/benchmarks/throughput.py
View file @
d3bd1711
...
@@ -358,7 +358,23 @@ def get_requests(args, tokenizer):
...
@@ -358,7 +358,23 @@ def get_requests(args, tokenizer):
raise
ValueError
(
f
"Unknown dataset name:
{
args
.
dataset_name
}
"
)
raise
ValueError
(
f
"Unknown dataset name:
{
args
.
dataset_name
}
"
)
# Remove None values
# Remove None values
sample_kwargs
=
{
k
:
v
for
k
,
v
in
sample_kwargs
.
items
()
if
v
is
not
None
}
sample_kwargs
=
{
k
:
v
for
k
,
v
in
sample_kwargs
.
items
()
if
v
is
not
None
}
return
dataset_cls
(
**
common_kwargs
).
sample
(
**
sample_kwargs
)
requests
=
dataset_cls
(
**
common_kwargs
).
sample
(
**
sample_kwargs
)
requests
=
filter_requests_for_dp
(
requests
,
args
.
data_parallel_size
)
return
requests
def
filter_requests_for_dp
(
requests
,
data_parallel_size
):
# Note(zhuohan): The way we get data_parallel_rank is hacky and only
# works for external launcher mode. Should be cleaned up and deprecated
# in the future with a better vLLM distributed process design.
if
data_parallel_size
==
1
:
return
requests
global_rank
=
int
(
os
.
environ
[
"RANK"
])
world_size
=
int
(
os
.
environ
[
"WORLD_SIZE"
])
data_parallel_rank
=
global_rank
//
(
world_size
//
data_parallel_size
)
return
[
r
for
i
,
r
in
enumerate
(
requests
)
if
i
%
data_parallel_size
==
data_parallel_rank
]
def
validate_args
(
args
):
def
validate_args
(
args
):
...
@@ -454,11 +470,16 @@ def validate_args(args):
...
@@ -454,11 +470,16 @@ def validate_args(args):
raise
ValueError
(
raise
ValueError
(
"Tokenizer must be the same as the model for MII backend."
)
"Tokenizer must be the same as the model for MII backend."
)
# --data-parallel is not supported currently.
if
args
.
data_parallel_size
>
1
and
(
# https://github.com/vllm-project/vllm/issues/16222
args
.
distributed_executor_backend
!=
"external_launcher"
if
args
.
data_parallel_size
>
1
:
or
args
.
async_engine
):
# --data-parallel is not supported fully.
# Old issue: https://github.com/vllm-project/vllm/issues/16222
# Currently we only support data parallel with external launcher
# mode (i.e., launch with toruchrun).
raise
ValueError
(
raise
ValueError
(
"Data parallel is not supported in offline benchmark, "
"Data parallel is only supported with external launcher mode "
"with synchronous engine in offline benchmark, "
"please use benchmark serving instead"
"please use benchmark serving instead"
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment