Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
58fcc854
Unverified
Commit
58fcc854
authored
Sep 09, 2024
by
Adam Lugowski
Committed by
GitHub
Sep 09, 2024
Browse files
[Frontend] Add progress reporting to run_batch.py (#8060)
Co-authored-by:
Adam Lugowski
<
adam.lugowski@parasail.io
>
parent
08287ef6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
48 additions
and
6 deletions
+48
-6
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+48
-6
No files found.
vllm/entrypoints/openai/run_batch.py
View file @
58fcc854
import
asyncio
from
io
import
StringIO
from
typing
import
Awaitable
,
Callable
,
List
from
typing
import
Awaitable
,
Callable
,
List
,
Optional
import
aiohttp
import
torch
from
prometheus_client
import
start_http_server
from
tqdm
import
tqdm
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
...
...
@@ -78,6 +80,38 @@ def parse_args():
return
parser
.
parse_args
()
# explicitly use pure text format, with a newline at the end
# this makes it impossible to see the animation in the progress bar
# but will avoid messing up with ray or multiprocessing, which wraps
# each line of output with some prefix.
_BAR_FORMAT
=
"{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
\n
"
# noqa: E501
class
BatchProgressTracker
:
def
__init__
(
self
):
self
.
_total
=
0
self
.
_pbar
:
Optional
[
tqdm
]
=
None
def
submitted
(
self
):
self
.
_total
+=
1
def
completed
(
self
):
if
self
.
_pbar
:
self
.
_pbar
.
update
()
def
pbar
(
self
)
->
tqdm
:
enable_tqdm
=
not
torch
.
distributed
.
is_initialized
(
)
or
torch
.
distributed
.
get_rank
()
==
0
self
.
_pbar
=
tqdm
(
total
=
self
.
_total
,
unit
=
"req"
,
desc
=
"Running batch"
,
mininterval
=
5
,
disable
=
not
enable_tqdm
,
bar_format
=
_BAR_FORMAT
)
return
self
.
_pbar
async
def
read_file
(
path_or_url
:
str
)
->
str
:
if
path_or_url
.
startswith
(
"http://"
)
or
path_or_url
.
startswith
(
"https://"
):
async
with
aiohttp
.
ClientSession
()
as
session
,
\
...
...
@@ -102,7 +136,8 @@ async def write_file(path_or_url: str, data: str) -> None:
async
def
run_request
(
serving_engine_func
:
Callable
,
request
:
BatchRequestInput
)
->
BatchRequestOutput
:
request
:
BatchRequestInput
,
tracker
:
BatchProgressTracker
)
->
BatchRequestOutput
:
response
=
await
serving_engine_func
(
request
.
body
)
if
isinstance
(
response
,
(
ChatCompletionResponse
,
EmbeddingResponse
)):
...
...
@@ -125,6 +160,7 @@ async def run_request(serving_engine_func: Callable,
else
:
raise
ValueError
(
"Request must not be sent in stream mode"
)
tracker
.
completed
()
return
batch_output
...
...
@@ -164,6 +200,9 @@ async def main(args):
request_logger
=
request_logger
,
)
tracker
=
BatchProgressTracker
()
logger
.
info
(
"Reading batch from %s..."
,
args
.
input_file
)
# Submit all requests in the file to the engine "concurrently".
response_futures
:
List
[
Awaitable
[
BatchRequestOutput
]]
=
[]
for
request_json
in
(
await
read_file
(
args
.
input_file
)).
strip
().
split
(
"
\n
"
):
...
...
@@ -178,15 +217,18 @@ async def main(args):
if
request
.
url
==
"/v1/chat/completions"
:
response_futures
.
append
(
run_request
(
openai_serving_chat
.
create_chat_completion
,
request
))
request
,
tracker
))
tracker
.
submitted
()
elif
request
.
url
==
"/v1/embeddings"
:
response_futures
.
append
(
run_request
(
openai_serving_embedding
.
create_embedding
,
request
))
run_request
(
openai_serving_embedding
.
create_embedding
,
request
,
tracker
))
tracker
.
submitted
()
else
:
raise
ValueError
(
"Only /v1/chat/completions and /v1/embeddings are"
"supported in the batch endpoint."
)
with
tracker
.
pbar
():
responses
=
await
asyncio
.
gather
(
*
response_futures
)
output_buffer
=
StringIO
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment