Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cea95dfb
Unverified
Commit
cea95dfb
authored
Sep 10, 2024
by
Pooya Davoodi
Committed by
GitHub
Sep 11, 2024
Browse files
[Frontend] Create ErrorResponse instead of raising exceptions in run_batch (#8347)
parent
6a512a00
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
4 deletions
+31
-4
tests/entrypoints/openai/test_run_batch.py
tests/entrypoints/openai/test_run_batch.py
+3
-1
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+28
-3
No files found.
tests/entrypoints/openai/test_run_batch.py
View file @
cea95dfb
...
...
@@ -8,7 +8,9 @@ from vllm.entrypoints.openai.protocol import BatchRequestOutput
INPUT_BATCH
=
"""{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
INVALID_INPUT_BATCH
=
"""{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
...
...
vllm/entrypoints/openai/run_batch.py
View file @
cea95dfb
import
asyncio
from
http
import
HTTPStatus
from
io
import
StringIO
from
typing
import
Awaitable
,
Callable
,
List
,
Optional
...
...
@@ -135,6 +136,25 @@ async def write_file(path_or_url: str, data: str) -> None:
f
.
write
(
data
)
def
make_error_request_output
(
request
:
BatchRequestInput
,
error_msg
:
str
)
->
BatchRequestOutput
:
batch_output
=
BatchRequestOutput
(
id
=
f
"vllm-
{
random_uuid
()
}
"
,
custom_id
=
request
.
custom_id
,
response
=
BatchResponseData
(
status_code
=
HTTPStatus
.
BAD_REQUEST
,
request_id
=
f
"vllm-batch-
{
random_uuid
()
}
"
,
),
error
=
error_msg
,
)
return
batch_output
async
def
make_async_error_request_output
(
request
:
BatchRequestInput
,
error_msg
:
str
)
->
BatchRequestOutput
:
return
make_error_request_output
(
request
,
error_msg
)
async
def
run_request
(
serving_engine_func
:
Callable
,
request
:
BatchRequestInput
,
tracker
:
BatchProgressTracker
)
->
BatchRequestOutput
:
...
...
@@ -158,7 +178,8 @@ async def run_request(serving_engine_func: Callable,
error
=
response
,
)
else
:
raise
ValueError
(
"Request must not be sent in stream mode"
)
batch_output
=
make_error_request_output
(
request
,
error_msg
=
"Request must not be sent in stream mode"
)
tracker
.
completed
()
return
batch_output
...
...
@@ -225,8 +246,12 @@ async def main(args):
tracker
))
tracker
.
submitted
()
else
:
raise
ValueError
(
"Only /v1/chat/completions and /v1/embeddings are"
"supported in the batch endpoint."
)
response_futures
.
append
(
make_async_error_request_output
(
request
,
error_msg
=
"Only /v1/chat/completions and "
"/v1/embeddings are supported in the batch endpoint."
,
))
with
tracker
.
pbar
():
responses
=
await
asyncio
.
gather
(
*
response_futures
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment