Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e4be7d70
Unverified
Commit
e4be7d70
authored
Apr 06, 2024
by
youkaichao
Committed by
GitHub
Apr 06, 2024
Browse files
[CI/Benchmark] add more iteration and use median for robust latency benchmark (#3889)
parent
54951ac4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
2 deletions
+12
-2
benchmarks/benchmark_latency.py
benchmarks/benchmark_latency.py
+12
-2
No files found.
benchmarks/benchmark_latency.py
View file @
e4be7d70
...
@@ -68,6 +68,7 @@ def main(args: argparse.Namespace):
...
@@ -68,6 +68,7 @@ def main(args: argparse.Namespace):
return
latency
return
latency
print
(
"Warming up..."
)
print
(
"Warming up..."
)
for
_
in
tqdm
(
range
(
args
.
num_iters_warmup
),
desc
=
"Warmup iterations"
):
run_to_completion
(
profile_dir
=
None
)
run_to_completion
(
profile_dir
=
None
)
if
args
.
profile
:
if
args
.
profile
:
...
@@ -84,7 +85,12 @@ def main(args: argparse.Namespace):
...
@@ -84,7 +85,12 @@ def main(args: argparse.Namespace):
latencies
=
[]
latencies
=
[]
for
_
in
tqdm
(
range
(
args
.
num_iters
),
desc
=
"Profiling iterations"
):
for
_
in
tqdm
(
range
(
args
.
num_iters
),
desc
=
"Profiling iterations"
):
latencies
.
append
(
run_to_completion
(
profile_dir
=
None
))
latencies
.
append
(
run_to_completion
(
profile_dir
=
None
))
latencies
=
np
.
array
(
latencies
)
percentages
=
[
10
,
25
,
50
,
75
,
90
]
percentiles
=
np
.
percentile
(
latencies
,
percentages
)
print
(
f
'Avg latency:
{
np
.
mean
(
latencies
)
}
seconds'
)
print
(
f
'Avg latency:
{
np
.
mean
(
latencies
)
}
seconds'
)
for
percentage
,
percentile
in
zip
(
percentages
,
percentiles
):
print
(
f
'
{
percentage
}
% percentile latency:
{
percentile
}
seconds'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
@@ -106,9 +112,13 @@ if __name__ == '__main__':
...
@@ -106,9 +112,13 @@ if __name__ == '__main__':
default
=
1
,
default
=
1
,
help
=
'Number of generated sequences per prompt.'
)
help
=
'Number of generated sequences per prompt.'
)
parser
.
add_argument
(
'--use-beam-search'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--use-beam-search'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--num-iters-warmup'
,
type
=
int
,
default
=
10
,
help
=
'Number of iterations to run for warmup.'
)
parser
.
add_argument
(
'--num-iters'
,
parser
.
add_argument
(
'--num-iters'
,
type
=
int
,
type
=
int
,
default
=
3
,
default
=
3
0
,
help
=
'Number of iterations to run.'
)
help
=
'Number of iterations to run.'
)
parser
.
add_argument
(
'--trust-remote-code'
,
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
action
=
'store_true'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment