Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
02cc3b51
Unverified
Commit
02cc3b51
authored
Jun 05, 2024
by
Tyler Michael Smith
Committed by
GitHub
Jun 05, 2024
Browse files
[misc] benchmark_serving.py -- add ITL results and tweak TPOT results (#5263)
parent
d5b1eb08
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
2 deletions
+23
-2
.buildkite/run-benchmarks.sh
.buildkite/run-benchmarks.sh
+1
-1
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+22
-1
No files found.
.buildkite/run-benchmarks.sh
View file @
02cc3b51
...
@@ -50,7 +50,7 @@ echo "### Serving Benchmarks" >> benchmark_results.md
...
@@ -50,7 +50,7 @@ echo "### Serving Benchmarks" >> benchmark_results.md
sed
-n
'1p'
benchmark_serving.txt
>>
benchmark_results.md
# first line
sed
-n
'1p'
benchmark_serving.txt
>>
benchmark_results.md
# first line
echo
""
>>
benchmark_results.md
echo
""
>>
benchmark_results.md
echo
'```'
>>
benchmark_results.md
echo
'```'
>>
benchmark_results.md
tail
-n
2
0
benchmark_serving.txt
>>
benchmark_results.md
# last 2
0
lines
tail
-n
2
4
benchmark_serving.txt
>>
benchmark_results.md
# last 2
4
lines
echo
'```'
>>
benchmark_results.md
echo
'```'
>>
benchmark_results.md
# if the agent binary is not found, skip uploading the results, exit 0
# if the agent binary is not found, skip uploading the results, exit 0
...
...
benchmarks/benchmark_serving.py
View file @
02cc3b51
...
@@ -56,6 +56,9 @@ class BenchmarkMetrics:
...
@@ -56,6 +56,9 @@ class BenchmarkMetrics:
mean_tpot_ms
:
float
mean_tpot_ms
:
float
median_tpot_ms
:
float
median_tpot_ms
:
float
p99_tpot_ms
:
float
p99_tpot_ms
:
float
mean_itl_ms
:
float
median_itl_ms
:
float
p99_itl_ms
:
float
def
sample_sharegpt_requests
(
def
sample_sharegpt_requests
(
...
@@ -200,16 +203,24 @@ def calculate_metrics(
...
@@ -200,16 +203,24 @@ def calculate_metrics(
actual_output_lens
=
[]
actual_output_lens
=
[]
total_input
=
0
total_input
=
0
completed
=
0
completed
=
0
itls
=
[]
tpots
=
[]
tpots
=
[]
ttfts
=
[]
ttfts
=
[]
for
i
in
range
(
len
(
outputs
)):
for
i
in
range
(
len
(
outputs
)):
if
outputs
[
i
].
success
:
if
outputs
[
i
].
success
:
output_len
=
len
(
tokenizer
(
outputs
[
i
].
generated_text
).
input_ids
)
# We use the tokenizer to count the number of output tokens for all
# serving backends instead of looking at len(outputs[i].itl) since
# multiple output tokens may be bundled together
# Note: this may inflate the output token count slightly
output_len
=
len
(
tokenizer
(
outputs
[
i
].
generated_text
,
add_special_tokens
=
False
).
input_ids
)
actual_output_lens
.
append
(
output_len
)
actual_output_lens
.
append
(
output_len
)
total_input
+=
input_requests
[
i
][
1
]
total_input
+=
input_requests
[
i
][
1
]
if
output_len
>
1
:
if
output_len
>
1
:
tpots
.
append
(
tpots
.
append
(
(
outputs
[
i
].
latency
-
outputs
[
i
].
ttft
)
/
(
output_len
-
1
))
(
outputs
[
i
].
latency
-
outputs
[
i
].
ttft
)
/
(
output_len
-
1
))
itls
+=
outputs
[
i
].
itl
ttfts
.
append
(
outputs
[
i
].
ttft
)
ttfts
.
append
(
outputs
[
i
].
ttft
)
completed
+=
1
completed
+=
1
else
:
else
:
...
@@ -234,6 +245,9 @@ def calculate_metrics(
...
@@ -234,6 +245,9 @@ def calculate_metrics(
mean_tpot_ms
=
np
.
mean
(
tpots
or
0
)
*
1000
,
mean_tpot_ms
=
np
.
mean
(
tpots
or
0
)
*
1000
,
median_tpot_ms
=
np
.
median
(
tpots
or
0
)
*
1000
,
median_tpot_ms
=
np
.
median
(
tpots
or
0
)
*
1000
,
p99_tpot_ms
=
np
.
percentile
(
tpots
or
0
,
99
)
*
1000
,
p99_tpot_ms
=
np
.
percentile
(
tpots
or
0
,
99
)
*
1000
,
mean_itl_ms
=
np
.
mean
(
itls
or
0
)
*
1000
,
median_itl_ms
=
np
.
median
(
itls
or
0
)
*
1000
,
p99_itl_ms
=
np
.
percentile
(
itls
or
0
,
99
)
*
1000
,
)
)
return
metrics
,
actual_output_lens
return
metrics
,
actual_output_lens
...
@@ -333,6 +347,10 @@ async def benchmark(
...
@@ -333,6 +347,10 @@ async def benchmark(
print
(
"{:<40} {:<10.2f}"
.
format
(
"Median TPOT (ms):"
,
print
(
"{:<40} {:<10.2f}"
.
format
(
"Median TPOT (ms):"
,
metrics
.
median_tpot_ms
))
metrics
.
median_tpot_ms
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"P99 TPOT (ms):"
,
metrics
.
p99_tpot_ms
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"P99 TPOT (ms):"
,
metrics
.
p99_tpot_ms
))
print
(
"{s:{c}^{n}}"
.
format
(
s
=
'Inter-token Latency'
,
n
=
50
,
c
=
'-'
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Mean ITL (ms):"
,
metrics
.
mean_itl_ms
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Median ITL (ms):"
,
metrics
.
median_itl_ms
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"P99 ITL (ms):"
,
metrics
.
p99_itl_ms
))
print
(
"="
*
50
)
print
(
"="
*
50
)
result
=
{
result
=
{
...
@@ -349,6 +367,9 @@ async def benchmark(
...
@@ -349,6 +367,9 @@ async def benchmark(
"mean_tpot_ms"
:
metrics
.
mean_tpot_ms
,
"mean_tpot_ms"
:
metrics
.
mean_tpot_ms
,
"median_tpot_ms"
:
metrics
.
median_tpot_ms
,
"median_tpot_ms"
:
metrics
.
median_tpot_ms
,
"p99_tpot_ms"
:
metrics
.
p99_tpot_ms
,
"p99_tpot_ms"
:
metrics
.
p99_tpot_ms
,
"mean_itl_ms"
:
metrics
.
mean_itl_ms
,
"median_itl_ms"
:
metrics
.
median_itl_ms
,
"p99_itl_ms"
:
metrics
.
p99_itl_ms
,
"input_lens"
:
[
output
.
prompt_len
for
output
in
outputs
],
"input_lens"
:
[
output
.
prompt_len
for
output
in
outputs
],
"output_lens"
:
actual_output_lens
,
"output_lens"
:
actual_output_lens
,
"ttfts"
:
[
output
.
ttft
for
output
in
outputs
],
"ttfts"
:
[
output
.
ttft
for
output
in
outputs
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment