Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b95697d7
Unverified
Commit
b95697d7
authored
Aug 20, 2025
by
Chen Zhang
Committed by
GitHub
Aug 20, 2025
Browse files
[Frontend] improve error logging of chat completion (#22957)
Signed-off-by:
Chen Zhang
<
zhangch99@outlook.com
>
parent
582bbe6b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
57 additions
and
17 deletions
+57
-17
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+57
-17
No files found.
vllm/entrypoints/openai/api_server.py
View file @
b95697d7
...
@@ -600,8 +600,11 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
...
@@ -600,8 +600,11 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
if
handler
is
None
:
if
handler
is
None
:
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Responses API"
)
message
=
"The model does not support Responses API"
)
try
:
generator
=
await
handler
.
create_responses
(
request
,
raw_request
)
generator
=
await
handler
.
create_responses
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
...
@@ -618,7 +621,11 @@ async def retrieve_responses(response_id: str, raw_request: Request):
...
@@ -618,7 +621,11 @@ async def retrieve_responses(response_id: str, raw_request: Request):
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Responses API"
)
message
=
"The model does not support Responses API"
)
response
=
await
handler
.
retrieve_responses
(
response_id
)
try
:
response
=
await
handler
.
retrieve_responses
(
response_id
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
response
,
ErrorResponse
):
if
isinstance
(
response
,
ErrorResponse
):
return
JSONResponse
(
content
=
response
.
model_dump
(),
return
JSONResponse
(
content
=
response
.
model_dump
(),
...
@@ -633,7 +640,11 @@ async def cancel_responses(response_id: str, raw_request: Request):
...
@@ -633,7 +640,11 @@ async def cancel_responses(response_id: str, raw_request: Request):
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Responses API"
)
message
=
"The model does not support Responses API"
)
response
=
await
handler
.
cancel_responses
(
response_id
)
try
:
response
=
await
handler
.
cancel_responses
(
response_id
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
response
,
ErrorResponse
):
if
isinstance
(
response
,
ErrorResponse
):
return
JSONResponse
(
content
=
response
.
model_dump
(),
return
JSONResponse
(
content
=
response
.
model_dump
(),
...
@@ -667,9 +678,11 @@ async def create_chat_completion(request: ChatCompletionRequest,
...
@@ -667,9 +678,11 @@ async def create_chat_completion(request: ChatCompletionRequest,
if
handler
is
None
:
if
handler
is
None
:
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Chat Completions API"
)
message
=
"The model does not support Chat Completions API"
)
try
:
generator
=
await
handler
.
create_chat_completion
(
request
,
raw_request
)
generator
=
await
handler
.
create_chat_completion
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
error
.
code
)
status_code
=
generator
.
error
.
code
)
...
@@ -742,7 +755,11 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
...
@@ -742,7 +755,11 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Embeddings API"
)
message
=
"The model does not support Embeddings API"
)
generator
=
await
handler
.
create_embedding
(
request
,
raw_request
)
try
:
generator
=
await
handler
.
create_embedding
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
...
@@ -770,8 +787,11 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
...
@@ -770,8 +787,11 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
if
handler
is
None
:
if
handler
is
None
:
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Pooling API"
)
message
=
"The model does not support Pooling API"
)
try
:
generator
=
await
handler
.
create_pooling
(
request
,
raw_request
)
generator
=
await
handler
.
create_pooling
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
error
.
code
)
status_code
=
generator
.
error
.
code
)
...
@@ -791,7 +811,11 @@ async def create_classify(request: ClassificationRequest,
...
@@ -791,7 +811,11 @@ async def create_classify(request: ClassificationRequest,
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Classification API"
)
message
=
"The model does not support Classification API"
)
generator
=
await
handler
.
create_classify
(
request
,
raw_request
)
try
:
generator
=
await
handler
.
create_classify
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
error
.
code
)
status_code
=
generator
.
error
.
code
)
...
@@ -820,7 +844,11 @@ async def create_score(request: ScoreRequest, raw_request: Request):
...
@@ -820,7 +844,11 @@ async def create_score(request: ScoreRequest, raw_request: Request):
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Score API"
)
message
=
"The model does not support Score API"
)
generator
=
await
handler
.
create_score
(
request
,
raw_request
)
try
:
generator
=
await
handler
.
create_score
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
error
.
code
)
status_code
=
generator
.
error
.
code
)
...
@@ -878,8 +906,12 @@ async def create_transcriptions(raw_request: Request,
...
@@ -878,8 +906,12 @@ async def create_transcriptions(raw_request: Request,
message
=
"The model does not support Transcriptions API"
)
message
=
"The model does not support Transcriptions API"
)
audio_data
=
await
request
.
file
.
read
()
audio_data
=
await
request
.
file
.
read
()
generator
=
await
handler
.
create_transcription
(
audio_data
,
request
,
try
:
raw_request
)
generator
=
await
handler
.
create_transcription
(
audio_data
,
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
...
@@ -919,8 +951,12 @@ async def create_translations(request: Annotated[TranslationRequest,
...
@@ -919,8 +951,12 @@ async def create_translations(request: Annotated[TranslationRequest,
message
=
"The model does not support Translations API"
)
message
=
"The model does not support Translations API"
)
audio_data
=
await
request
.
file
.
read
()
audio_data
=
await
request
.
file
.
read
()
generator
=
await
handler
.
create_translation
(
audio_data
,
request
,
try
:
raw_request
)
generator
=
await
handler
.
create_translation
(
audio_data
,
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
...
@@ -949,7 +985,11 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
...
@@ -949,7 +985,11 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
if
handler
is
None
:
if
handler
is
None
:
return
base
(
raw_request
).
create_error_response
(
return
base
(
raw_request
).
create_error_response
(
message
=
"The model does not support Rerank (Score) API"
)
message
=
"The model does not support Rerank (Score) API"
)
generator
=
await
handler
.
do_rerank
(
request
,
raw_request
)
try
:
generator
=
await
handler
.
do_rerank
(
request
,
raw_request
)
except
Exception
as
e
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
.
value
,
detail
=
str
(
e
))
from
e
if
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
error
.
code
)
status_code
=
generator
.
error
.
code
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment