Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f17a1a8f
Unverified
Commit
f17a1a8f
authored
May 25, 2024
by
Roger Wang
Committed by
GitHub
May 25, 2024
Browse files
[Misc] Make Serving Benchmark More User-friendly (#5044)
parent
d5a16977
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
3 deletions
+32
-3
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+6
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+26
-3
No files found.
benchmarks/backend_request_func.py
View file @
f17a1a8f
...
...
@@ -89,6 +89,9 @@ async def async_request_tgi(
output
.
latency
=
most_recent_timestamp
-
st
output
.
success
=
True
output
.
generated_text
=
data
[
"generated_text"
]
else
:
output
.
error
=
response
.
reason
or
""
output
.
success
=
False
except
Exception
:
output
.
success
=
False
exc_info
=
sys
.
exc_info
()
...
...
@@ -276,6 +279,9 @@ async def async_request_openai_completions(
output
.
generated_text
=
generated_text
output
.
success
=
True
output
.
latency
=
latency
else
:
output
.
error
=
response
.
reason
or
""
output
.
success
=
False
except
Exception
:
output
.
success
=
False
exc_info
=
sys
.
exc_info
()
...
...
benchmarks/benchmark_serving.py
View file @
f17a1a8f
...
...
@@ -215,6 +215,11 @@ def calculate_metrics(
else
:
actual_output_lens
.
append
(
0
)
if
completed
==
0
:
warnings
.
warn
(
"All requests failed. This is likely due to a misconfiguration "
"on the benchmark arguments."
,
stacklevel
=
2
)
metrics
=
BenchmarkMetrics
(
completed
=
completed
,
total_input
=
total_input
,
...
...
@@ -226,9 +231,9 @@ def calculate_metrics(
1000
,
# ttfts is empty if streaming is not supported by backend
median_ttft_ms
=
np
.
median
(
ttfts
or
0
)
*
1000
,
p99_ttft_ms
=
np
.
percentile
(
ttfts
or
0
,
99
)
*
1000
,
mean_tpot_ms
=
np
.
mean
(
tpots
)
*
1000
,
median_tpot_ms
=
np
.
median
(
tpots
)
*
1000
,
p99_tpot_ms
=
np
.
percentile
(
tpots
,
99
)
*
1000
,
mean_tpot_ms
=
np
.
mean
(
tpots
or
0
)
*
1000
,
median_tpot_ms
=
np
.
median
(
tpots
or
0
)
*
1000
,
p99_tpot_ms
=
np
.
percentile
(
tpots
or
0
,
99
)
*
1000
,
)
return
metrics
,
actual_output_lens
...
...
@@ -250,6 +255,24 @@ async def benchmark(
else
:
raise
ValueError
(
f
"Unknown backend:
{
backend
}
"
)
print
(
"Starting initial single prompt test run..."
)
test_prompt
,
test_prompt_len
,
test_output_len
=
input_requests
[
0
]
test_input
=
RequestFuncInput
(
model
=
model_id
,
prompt
=
test_prompt
,
api_url
=
api_url
,
prompt_len
=
test_prompt_len
,
output_len
=
test_output_len
,
best_of
=
best_of
,
use_beam_search
=
use_beam_search
,
)
test_output
=
await
request_func
(
request_func_input
=
test_input
)
if
not
test_output
.
success
:
raise
ValueError
(
"Initial test run failed - Please make sure benchmark arguments "
f
"are correctly specified. Error:
{
test_output
.
error
}
"
)
else
:
print
(
"Initial test run completed. Starting main benchmark run..."
)
print
(
f
"Traffic request rate:
{
request_rate
}
"
)
pbar
=
None
if
disable_tqdm
else
tqdm
(
total
=
len
(
input_requests
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment