Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
583a9778
Unverified
Commit
583a9778
authored
Mar 16, 2025
by
Simon Mo
Committed by
GitHub
Mar 16, 2025
Browse files
[Benchmark] Do not save detailed info to json by default (#14879)
Signed-off-by:
simon-mo
<
simon.mo@hey.com
>
parent
a73e183e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
1 deletion
+19
-1
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+4
-1
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+15
-0
No files found.
benchmarks/backend_request_func.py
View file @
583a9778
...
...
@@ -14,7 +14,8 @@ from tqdm.asyncio import tqdm
from
transformers
import
(
AutoTokenizer
,
PreTrainedTokenizer
,
PreTrainedTokenizerFast
)
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
# NOTE(simon): do not import vLLM here so the benchmark script
# can run without vLLM installed.
AIOHTTP_TIMEOUT
=
aiohttp
.
ClientTimeout
(
total
=
6
*
60
*
60
)
...
...
@@ -427,6 +428,8 @@ def get_model(pretrained_model_name_or_path: str) -> str:
if
os
.
getenv
(
'VLLM_USE_MODELSCOPE'
,
'False'
).
lower
()
==
'true'
:
from
modelscope
import
snapshot_download
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with
get_lock
(
pretrained_model_name_or_path
):
...
...
benchmarks/benchmark_serving.py
View file @
583a9778
...
...
@@ -684,6 +684,15 @@ def main(args: argparse.Namespace):
"Invalid metadata format. Please use KEY=VALUE format."
)
if
not
args
.
save_detailed
:
# Remove fields with too many data points
for
field
in
[
"input_lens"
,
"output_lens"
,
"ttfts"
,
"itls"
,
"generated_texts"
,
"errors"
]:
if
field
in
result_json
:
del
result_json
[
field
]
# Traffic
result_json
[
"request_rate"
]
=
(
args
.
request_rate
if
args
.
request_rate
<
float
(
"inf"
)
else
"inf"
)
...
...
@@ -828,6 +837,12 @@ if __name__ == "__main__":
action
=
"store_true"
,
help
=
"Specify to save benchmark results to a json file"
,
)
parser
.
add_argument
(
"--save-detailed"
,
action
=
"store_true"
,
help
=
"When saving the results, whether to include per request "
"information such as response, error, ttfs, tpots, etc."
,
)
parser
.
add_argument
(
"--metadata"
,
metavar
=
"KEY=VALUE"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment