Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
533db093
Unverified
Commit
533db093
authored
Jul 30, 2025
by
Peter Pan
Committed by
GitHub
Jul 30, 2025
Browse files
[benchmark] add max-concurrency in result table (#21095)
Signed-off-by:
Peter Pan
<
Peter.Pan@daocloud.io
>
parent
fc91da54
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
0 deletions
+14
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+4
-0
benchmarks/benchmark_serving_structured_output.py
benchmarks/benchmark_serving_structured_output.py
+4
-0
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+6
-0
No files found.
benchmarks/benchmark_serving.py
View file @
533db093
...
...
@@ -413,6 +413,10 @@ async def benchmark(
print
(
"{s:{c}^{n}}"
.
format
(
s
=
" Serving Benchmark Result "
,
n
=
50
,
c
=
"="
))
print
(
"{:<40} {:<10}"
.
format
(
"Successful requests:"
,
metrics
.
completed
))
if
max_concurrency
is
not
None
:
print
(
"{:<40} {:<10}"
.
format
(
"Maximum request concurrency:"
,
max_concurrency
))
if
request_rate
!=
float
(
"inf"
):
print
(
"{:<40} {:<10.2f}"
.
format
(
"Request rate configured (RPS):"
,
request_rate
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Benchmark duration (s):"
,
benchmark_duration
))
print
(
"{:<40} {:<10}"
.
format
(
"Total input tokens:"
,
metrics
.
total_input
))
print
(
"{:<40} {:<10}"
.
format
(
"Total generated tokens:"
,
metrics
.
total_output
))
...
...
benchmarks/benchmark_serving_structured_output.py
View file @
533db093
...
...
@@ -555,6 +555,10 @@ async def benchmark(
print
(
"{s:{c}^{n}}"
.
format
(
s
=
" Serving Benchmark Result "
,
n
=
50
,
c
=
"="
))
print
(
"{:<40} {:<10}"
.
format
(
"Successful requests:"
,
metrics
.
completed
))
if
max_concurrency
is
not
None
:
print
(
"{:<40} {:<10}"
.
format
(
"Maximum request concurrency:"
,
max_concurrency
))
if
request_rate
!=
float
(
"inf"
):
print
(
"{:<40} {:<10.2f}"
.
format
(
"Request rate configured (RPS):"
,
request_rate
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Benchmark duration (s):"
,
benchmark_duration
))
print
(
"{:<40} {:<10}"
.
format
(
"Total input tokens:"
,
metrics
.
total_input
))
print
(
"{:<40} {:<10}"
.
format
(
"Total generated tokens:"
,
metrics
.
total_output
))
...
...
vllm/benchmarks/serve.py
View file @
533db093
...
...
@@ -486,6 +486,12 @@ async def benchmark(
print
(
"{s:{c}^{n}}"
.
format
(
s
=
' Serving Benchmark Result '
,
n
=
50
,
c
=
'='
))
print
(
"{:<40} {:<10}"
.
format
(
"Successful requests:"
,
metrics
.
completed
))
if
max_concurrency
is
not
None
:
print
(
"{:<40} {:<10}"
.
format
(
"Maximum request concurrency:"
,
max_concurrency
))
if
request_rate
!=
float
(
'inf'
):
print
(
"{:<40} {:<10.2f}"
.
format
(
"Request rate configured (RPS):"
,
request_rate
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Benchmark duration (s):"
,
benchmark_duration
))
print
(
"{:<40} {:<10}"
.
format
(
"Total input tokens:"
,
metrics
.
total_input
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment