Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
0370afa2
Unverified
Commit
0370afa2
authored
Jun 19, 2023
by
Zhuohan Li
Committed by
GitHub
Jun 19, 2023
Browse files
Remove benchmark_async_llm_server.py (#155)
parent
7e2a913c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
60 deletions
+0
-60
benchmarks/benchmark_async_llm_server.py
benchmarks/benchmark_async_llm_server.py
+0
-60
No files found.
benchmarks/benchmark_async_llm_server.py
deleted
100644 → 0
View file @
7e2a913c
import
argparse
import
json
import
threading
import
time
import
requests
def
main
(
args
:
argparse
.
Namespace
):
prompts
=
[
f
"Tell me a story with more than
{
''
.
join
([
str
(
i
+
1
)]
*
5
)
}
words"
for
i
in
range
(
args
.
n_threads
)]
api_url
=
f
"http://
{
args
.
host
}
:
{
args
.
port
}
/generate"
headers
=
{
"User-Agent"
:
"vLLM Benchmark Client"
}
ploads
=
[{
"prompt"
:
p
,
"max_tokens"
:
args
.
max_tokens
,
"temperature"
:
0.0
,
"ignore_eos"
:
True
,
}
for
p
in
prompts
]
def
send_request
(
results
,
i
):
response
=
requests
.
post
(
api_url
,
headers
=
headers
,
json
=
ploads
[
i
],
stream
=
True
)
results
[
i
]
=
response
# use args.n_threads to prompt the backend
tik
=
time
.
time
()
threads
=
[]
results
=
[
None
]
*
args
.
n_threads
for
i
in
range
(
args
.
n_threads
):
t
=
threading
.
Thread
(
target
=
send_request
,
args
=
(
results
,
i
))
t
.
start
()
threads
.
append
(
t
)
for
t
in
threads
:
t
.
join
()
print
(
f
"Time (POST):
{
time
.
time
()
-
tik
}
s"
)
n_words
=
0
for
i
,
response
in
enumerate
(
results
):
k
=
list
(
response
.
iter_lines
(
chunk_size
=
8192
,
decode_unicode
=
False
,
delimiter
=
b
"
\0
"
))
response_new_words
=
json
.
loads
(
k
[
-
2
].
decode
(
"utf-8"
))[
"text"
][
0
]
n_words
+=
len
(
response_new_words
.
split
(
" "
))
-
len
(
prompts
[
i
].
split
(
" "
))
time_seconds
=
time
.
time
()
-
tik
print
(
f
"Time (total):
{
time_seconds
:.
3
f
}
s to finish, n_threads:
{
args
.
n_threads
}
, "
f
"throughput:
{
n_words
/
time_seconds
}
words/s."
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
"localhost"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
)
parser
.
add_argument
(
"--max-tokens"
,
type
=
int
,
default
=
128
)
parser
.
add_argument
(
"--n-threads"
,
type
=
int
,
default
=
128
)
args
=
parser
.
parse_args
()
main
(
args
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment