Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3ce2c050
"vllm/vscode:/vscode.git/clone" did not exist on "0fca3cdcf265cd375bca684d951702b6b7adf65a"
Unverified
Commit
3ce2c050
authored
Jun 15, 2024
by
zifeitong
Committed by
GitHub
Jun 15, 2024
Browse files
[Fix] Correct OpenAI batch response format (#5554)
parent
1c0afa13
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
5 deletions
+25
-5
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+12
-1
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/run_batch.py
+13
-4
No files found.
vllm/entrypoints/openai/protocol.py
View file @
3ce2c050
...
...
@@ -672,6 +672,17 @@ class BatchRequestInput(OpenAIBaseModel):
body
:
Union
[
ChatCompletionRequest
,
]
class
BatchResponseData
(
OpenAIBaseModel
):
# HTTP status code of the response.
status_code
:
int
=
200
# An unique identifier for the API request.
request_id
:
str
# The body of the response.
body
:
Union
[
ChatCompletionResponse
,
]
class
BatchRequestOutput
(
OpenAIBaseModel
):
"""
The per-line object of the batch output and error files
...
...
@@ -683,7 +694,7 @@ class BatchRequestOutput(OpenAIBaseModel):
# inputs.
custom_id
:
str
response
:
Optional
[
ChatCompletion
Response
]
response
:
Optional
[
Batch
Response
Data
]
# For requests that failed with a non-HTTP error, this will contain more
# information on the cause of the failure.
...
...
vllm/entrypoints/openai/run_batch.py
View file @
3ce2c050
...
...
@@ -10,7 +10,9 @@ from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
BatchRequestInput
,
BatchRequestOutput
,
ChatCompletionResponse
)
BatchResponseData
,
ChatCompletionResponse
,
ErrorResponse
)
from
vllm.entrypoints.openai.serving_chat
import
OpenAIServingChat
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
...
...
@@ -77,20 +79,27 @@ async def run_request(chat_serving: OpenAIServingChat,
request
:
BatchRequestInput
)
->
BatchRequestOutput
:
chat_request
=
request
.
body
chat_response
=
await
chat_serving
.
create_chat_completion
(
chat_request
)
if
isinstance
(
chat_response
,
ChatCompletionResponse
):
batch_output
=
BatchRequestOutput
(
id
=
f
"vllm-
{
random_uuid
()
}
"
,
custom_id
=
request
.
custom_id
,
response
=
chat_response
,
response
=
BatchResponseData
(
body
=
chat_response
,
request_id
=
f
"vllm-batch-
{
random_uuid
()
}
"
),
error
=
None
,
)
el
se
:
el
if
isinstance
(
chat_response
,
ErrorResponse
)
:
batch_output
=
BatchRequestOutput
(
id
=
f
"vllm-
{
random_uuid
()
}
"
,
custom_id
=
request
.
custom_id
,
response
=
None
,
response
=
BatchResponseData
(
status_code
=
chat_response
.
code
,
request_id
=
f
"vllm-batch-
{
random_uuid
()
}
"
),
error
=
chat_response
,
)
else
:
raise
ValueError
(
"Request must not be sent in stream mode"
)
return
batch_output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment