Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
855e0e6f
Unverified
Commit
855e0e6f
authored
Oct 20, 2024
by
Andy Dai
Committed by
GitHub
Oct 20, 2024
Browse files
[Frontend][Misc] Goodput metric support (#9338)
parent
4fa3e333
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
91 additions
and
2 deletions
+91
-2
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+91
-2
No files found.
benchmarks/benchmark_serving.py
View file @
855e0e6f
...
...
@@ -53,6 +53,8 @@ try:
except
ImportError
:
from
argparse
import
ArgumentParser
as
FlexibleArgumentParser
MILLISECONDS_TO_SECONDS_CONVERSION
=
1000
@
dataclass
class
BenchmarkMetrics
:
...
...
@@ -60,6 +62,7 @@ class BenchmarkMetrics:
total_input
:
int
total_output
:
int
request_throughput
:
float
request_goodput
:
float
output_throughput
:
float
total_token_throughput
:
float
mean_ttft_ms
:
float
...
...
@@ -316,12 +319,15 @@ def calculate_metrics(
tokenizer
:
PreTrainedTokenizerBase
,
selected_percentile_metrics
:
List
[
str
],
selected_percentiles
:
List
[
float
],
gootput_config_dict
:
Dict
[
str
,
float
],
)
->
Tuple
[
BenchmarkMetrics
,
List
[
int
]]:
actual_output_lens
:
List
[
int
]
=
[]
total_input
=
0
completed
=
0
good_completed
=
0
itls
:
List
[
float
]
=
[]
tpots
:
List
[
float
]
=
[]
all_tpots
:
List
[
float
]
=
[]
ttfts
:
List
[
float
]
=
[]
e2els
:
List
[
float
]
=
[]
for
i
in
range
(
len
(
outputs
)):
...
...
@@ -335,9 +341,13 @@ def calculate_metrics(
add_special_tokens
=
False
).
input_ids
)
actual_output_lens
.
append
(
output_len
)
total_input
+=
input_requests
[
i
][
1
]
tpot
=
0
if
output_len
>
1
:
tpots
.
append
(
(
outputs
[
i
].
latency
-
outputs
[
i
].
ttft
)
/
(
output_len
-
1
))
tpot
=
(
outputs
[
i
].
latency
-
outputs
[
i
].
ttft
)
/
(
output_len
-
1
)
tpots
.
append
(
tpot
)
# Note: if output_len <= 1, we regard tpot as 0 for goodput
all_tpots
.
append
(
tpot
)
itls
+=
outputs
[
i
].
itl
ttfts
.
append
(
outputs
[
i
].
ttft
)
e2els
.
append
(
outputs
[
i
].
latency
)
...
...
@@ -345,6 +355,28 @@ def calculate_metrics(
else
:
actual_output_lens
.
append
(
0
)
if
gootput_config_dict
:
valid_metrics
=
[]
slo_values
=
[]
if
"ttft"
in
gootput_config_dict
:
valid_metrics
.
append
(
ttfts
)
slo_values
.
append
(
gootput_config_dict
[
"ttft"
]
/
MILLISECONDS_TO_SECONDS_CONVERSION
)
if
"tpot"
in
gootput_config_dict
:
valid_metrics
.
append
(
all_tpots
)
slo_values
.
append
(
gootput_config_dict
[
"tpot"
]
/
MILLISECONDS_TO_SECONDS_CONVERSION
)
if
"e2el"
in
gootput_config_dict
:
valid_metrics
.
append
(
e2els
)
slo_values
.
append
(
gootput_config_dict
[
"e2el"
]
/
MILLISECONDS_TO_SECONDS_CONVERSION
)
for
req_metric
in
zip
(
*
valid_metrics
):
is_good_req
=
all
([
s
>=
r
for
s
,
r
in
zip
(
slo_values
,
req_metric
)])
if
is_good_req
:
good_completed
+=
1
if
completed
==
0
:
warnings
.
warn
(
"All requests failed. This is likely due to a misconfiguration "
...
...
@@ -355,6 +387,7 @@ def calculate_metrics(
total_input
=
total_input
,
total_output
=
sum
(
actual_output_lens
),
request_throughput
=
completed
/
dur_s
,
request_goodput
=
good_completed
/
dur_s
,
output_throughput
=
sum
(
actual_output_lens
)
/
dur_s
,
total_token_throughput
=
(
total_input
+
sum
(
actual_output_lens
))
/
dur_s
,
mean_ttft_ms
=
np
.
mean
(
ttfts
or
0
)
*
...
...
@@ -398,6 +431,7 @@ async def benchmark(
selected_percentile_metrics
:
List
[
str
],
selected_percentiles
:
List
[
str
],
ignore_eos
:
bool
,
gootput_config_dict
:
Dict
[
str
,
float
],
max_concurrency
:
Optional
[
int
],
):
if
backend
in
ASYNC_REQUEST_FUNCS
:
...
...
@@ -512,6 +546,7 @@ async def benchmark(
tokenizer
=
tokenizer
,
selected_percentile_metrics
=
selected_percentile_metrics
,
selected_percentiles
=
selected_percentiles
,
gootput_config_dict
=
gootput_config_dict
,
)
print
(
"{s:{c}^{n}}"
.
format
(
s
=
' Serving Benchmark Result '
,
n
=
50
,
c
=
'='
))
...
...
@@ -523,6 +558,9 @@ async def benchmark(
metrics
.
total_output
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Request throughput (req/s):"
,
metrics
.
request_throughput
))
if
gootput_config_dict
:
print
(
"{:<40} {:<10.2f}"
.
format
(
"Request goodput (req/s):"
,
metrics
.
request_goodput
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Output token throughput (tok/s):"
,
metrics
.
output_throughput
))
print
(
"{:<40} {:<10.2f}"
.
format
(
"Total Token throughput (tok/s):"
,
...
...
@@ -534,6 +572,8 @@ async def benchmark(
"total_input_tokens"
:
metrics
.
total_input
,
"total_output_tokens"
:
metrics
.
total_output
,
"request_throughput"
:
metrics
.
request_throughput
,
"request_goodput:"
:
metrics
.
request_goodput
if
gootput_config_dict
else
None
,
"output_throughput"
:
metrics
.
output_throughput
,
"total_token_throughput"
:
metrics
.
total_token_throughput
,
"input_lens"
:
[
output
.
prompt_len
for
output
in
outputs
],
...
...
@@ -587,6 +627,41 @@ async def benchmark(
return
result
def
check_goodput_args
(
args
):
# Check and parse goodput arguments
gootput_config_dict
=
{}
VALID_NAMES
=
[
"ttft"
,
"tpot"
,
"e2el"
]
if
args
.
goodput
:
gootput_config_dict
=
parse_goodput
(
args
.
goodput
)
for
slo_name
,
slo_val
in
gootput_config_dict
.
items
():
if
slo_name
not
in
VALID_NAMES
:
raise
ValueError
(
f
"Invalid metric name found,
{
slo_name
}
:
{
slo_val
}
. "
"The service level objective name should be one of "
f
"
{
str
(
VALID_NAMES
)
}
. "
)
if
slo_val
<
0
:
raise
ValueError
(
f
"Invalid value found,
{
slo_name
}
:
{
slo_val
}
. "
"The service level objective value should be "
"non-negative."
)
return
gootput_config_dict
def
parse_goodput
(
slo_pairs
):
gootput_config_dict
=
{}
try
:
for
slo_pair
in
slo_pairs
:
slo_name
,
slo_val
=
slo_pair
.
split
(
":"
)
gootput_config_dict
[
slo_name
]
=
float
(
slo_val
)
except
ValueError
as
err
:
raise
argparse
.
ArgumentTypeError
(
"Invalid format found for service level objectives. "
"Specify service level objectives for goodput as
\"
KEY:VALUE
\"
"
"pairs, where the key is a metric name, and the value is a "
"number in milliseconds."
)
from
err
return
gootput_config_dict
def
main
(
args
:
argparse
.
Namespace
):
print
(
args
)
random
.
seed
(
args
.
seed
)
...
...
@@ -681,6 +756,8 @@ def main(args: argparse.Namespace):
else
:
raise
ValueError
(
f
"Unknown dataset:
{
args
.
dataset_name
}
"
)
gootput_config_dict
=
check_goodput_args
(
args
)
benchmark_result
=
asyncio
.
run
(
benchmark
(
backend
=
backend
,
...
...
@@ -699,6 +776,7 @@ def main(args: argparse.Namespace):
float
(
p
)
for
p
in
args
.
metric_percentiles
.
split
(
","
)
],
ignore_eos
=
args
.
ignore_eos
,
gootput_config_dict
=
gootput_config_dict
,
max_concurrency
=
args
.
max_concurrency
,
))
...
...
@@ -915,6 +993,17 @@ if __name__ == "__main__":
"Default value is
\"
99
\"
. "
"Use
\"
--percentile-metrics
\"
to select metrics."
,
)
parser
.
add_argument
(
"--goodput"
,
nargs
=
"+"
,
required
=
False
,
help
=
"Specify service level objectives for goodput as
\"
KEY:VALUE
\"
"
"pairs, where the key is a metric name, and the value is in "
"milliseconds. Multiple
\"
KEY:VALUE
\"
pairs can be provided, "
"separated by spaces. Allowed request level metric names are "
"
\"
ttft
\"
,
\"
tpot
\"
,
\"
e2el
\"
. For more context on the definition of "
"goodput, refer to DistServe paper: https://arxiv.org/pdf/2401.09670 "
"and the blog: https://hao-ai-lab.github.io/blogs/distserve"
)
# group for dataset specific arguments
sonnet_group
=
parser
.
add_argument_group
(
"sonnet dataset options"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment