Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4d4297e8
Unverified
Commit
4d4297e8
authored
Aug 06, 2025
by
lkchen
Committed by
GitHub
Aug 06, 2025
Browse files
[Bench] Split serve.py:main into async/async versions (#22405)
Signed-off-by:
Linkun
<
github@lkchen.net
>
parent
2a4c8255
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
54 deletions
+58
-54
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+58
-54
No files found.
vllm/benchmarks/serve.py
View file @
4d4297e8
...
...
@@ -948,7 +948,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
)
def
main
(
args
:
argparse
.
Namespace
):
def
main
(
args
:
argparse
.
Namespace
)
->
dict
[
str
,
Any
]:
return
asyncio
.
run
(
main_async
(
args
))
async
def
main_async
(
args
:
argparse
.
Namespace
)
->
dict
[
str
,
Any
]:
print
(
args
)
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
...
...
@@ -1025,8 +1028,7 @@ def main(args: argparse.Namespace):
gc
.
collect
()
gc
.
freeze
()
benchmark_result
=
asyncio
.
run
(
benchmark
(
benchmark_result
=
await
benchmark
(
endpoint_type
=
args
.
endpoint_type
,
api_url
=
api_url
,
base_url
=
base_url
,
...
...
@@ -1052,62 +1054,62 @@ def main(args: argparse.Namespace):
ramp_up_start_rps
=
args
.
ramp_up_start_rps
,
ramp_up_end_rps
=
args
.
ramp_up_end_rps
,
ready_check_timeout_sec
=
args
.
ready_check_timeout_sec
,
)
)
)
# Save config and results to json
if
args
.
save_result
or
args
.
append_result
:
result_json
:
dict
[
str
,
Any
]
=
{}
# Setup
current_dt
=
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
result_json
[
"date"
]
=
current_dt
result_json
[
"endpoint_type"
]
=
args
.
endpoint_type
result_json
[
"label"
]
=
label
result_json
[
"model_id"
]
=
model_id
result_json
[
"tokenizer_id"
]
=
tokenizer_id
result_json
[
"num_prompts"
]
=
args
.
num_prompts
# Metadata
if
args
.
metadata
:
for
item
in
args
.
metadata
:
if
"="
in
item
:
kvstring
=
item
.
split
(
"="
)
result_json
[
kvstring
[
0
].
strip
()]
=
kvstring
[
1
].
strip
()
else
:
raise
ValueError
(
"Invalid metadata format. Please use KEY=VALUE format."
)
# Traffic
result_json
[
"request_rate"
]
=
(
args
.
request_rate
if
args
.
request_rate
<
float
(
"inf"
)
else
"inf"
)
result_json
[
"burstiness"
]
=
args
.
burstiness
result_json
[
"max_concurrency"
]
=
args
.
max_concurrency
result_json
:
dict
[
str
,
Any
]
=
{}
# Setup
current_dt
=
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
result_json
[
"date"
]
=
current_dt
result_json
[
"endpoint_type"
]
=
args
.
endpoint_type
result_json
[
"label"
]
=
label
result_json
[
"model_id"
]
=
model_id
result_json
[
"tokenizer_id"
]
=
tokenizer_id
result_json
[
"num_prompts"
]
=
args
.
num_prompts
# Metadata
if
args
.
metadata
:
for
item
in
args
.
metadata
:
if
"="
in
item
:
kvstring
=
item
.
split
(
"="
)
result_json
[
kvstring
[
0
].
strip
()]
=
kvstring
[
1
].
strip
()
else
:
raise
ValueError
(
"Invalid metadata format. Please use KEY=VALUE format."
)
if
args
.
ramp_up_strategy
is
not
None
:
result_json
[
"ramp_up_strategy"
]
=
args
.
ramp_up_strategy
result_json
[
"ramp_up_start_rps"
]
=
args
.
ramp_up_start_rps
result_json
[
"ramp_up_end_rps"
]
=
args
.
ramp_up_end_rps
# Merge with benchmark result
result_json
=
{
**
result_json
,
**
benchmark_result
}
if
not
args
.
save_detailed
:
# Remove fields with too many data points
for
field
in
[
"input_lens"
,
"output_lens"
,
"ttfts"
,
"itls"
,
"generated_texts"
,
"errors"
,
]:
if
field
in
result_json
:
del
result_json
[
field
]
if
field
in
benchmark_result
:
del
benchmark_result
[
field
]
# Traffic
result_json
[
"request_rate"
]
=
(
args
.
request_rate
if
args
.
request_rate
<
float
(
"inf"
)
else
"inf"
)
result_json
[
"burstiness"
]
=
args
.
burstiness
result_json
[
"max_concurrency"
]
=
args
.
max_concurrency
if
args
.
ramp_up_strategy
is
not
None
:
result_json
[
"ramp_up_strategy"
]
=
args
.
ramp_up_strategy
result_json
[
"ramp_up_start_rps"
]
=
args
.
ramp_up_start_rps
result_json
[
"ramp_up_end_rps"
]
=
args
.
ramp_up_end_rps
# Merge with benchmark result
result_json
=
{
**
result_json
,
**
benchmark_result
}
if
not
args
.
save_detailed
:
# Remove fields with too many data points
for
field
in
[
"input_lens"
,
"output_lens"
,
"ttfts"
,
"itls"
,
"generated_texts"
,
"errors"
,
]:
if
field
in
result_json
:
del
result_json
[
field
]
if
field
in
benchmark_result
:
del
benchmark_result
[
field
]
# Save to file
if
args
.
save_result
or
args
.
append_result
:
base_model_id
=
model_id
.
split
(
"/"
)[
-
1
]
max_concurrency_str
=
(
f
"-concurrency
{
args
.
max_concurrency
}
"
if
args
.
max_concurrency
is
not
None
else
""
)
...
...
@@ -1129,3 +1131,5 @@ def main(args: argparse.Namespace):
outfile
.
write
(
"
\n
"
)
json
.
dump
(
result_json
,
outfile
)
save_to_pytorch_benchmark_format
(
args
,
result_json
,
file_name
)
return
result_json
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment