Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
40c0461f
Unverified
Commit
40c0461f
authored
Mar 11, 2026
by
Ning Xie
Committed by
GitHub
Mar 11, 2026
Browse files
[openapi] refactor render related openapi [3/N] (#36749)
Signed-off-by:
Andy Xie
<
andy.xning@gmail.com
>
parent
72475968
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
71 additions
and
131 deletions
+71
-131
vllm/entrypoints/serve/render/serving.py
vllm/entrypoints/serve/render/serving.py
+71
-131
No files found.
vllm/entrypoints/serve/render/serving.py
View file @
40c0461f
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
sys
import
traceback
from
collections.abc
import
Callable
,
Sequence
from
http
import
HTTPStatus
from
typing
import
Any
import
jinja2
from
openai_harmony
import
Message
as
OpenAIMessage
from
vllm.config
import
ModelConfig
...
...
@@ -18,7 +15,6 @@ from vllm.entrypoints.logger import RequestLogger
from
vllm.entrypoints.openai.chat_completion.protocol
import
ChatCompletionRequest
from
vllm.entrypoints.openai.completion.protocol
import
CompletionRequest
from
vllm.entrypoints.openai.engine.protocol
import
(
ErrorInfo
,
ErrorResponse
,
ModelCard
,
ModelList
,
...
...
@@ -30,7 +26,7 @@ from vllm.entrypoints.openai.parser.harmony_utils import (
parse_chat_inputs_to_harmony_messages
,
render_for_completion
,
)
from
vllm.entrypoints.utils
import
sanitize_messag
e
from
vllm.entrypoints.utils
import
create_error_respons
e
from
vllm.inputs.data
import
ProcessorInputs
,
PromptType
,
SingletonPrompt
,
TokensPrompt
from
vllm.logger
import
init_logger
from
vllm.parser
import
ParserManager
...
...
@@ -102,7 +98,6 @@ class OpenAIServingRender:
logger
.
error
(
"Error with model %s"
,
error_check_ret
)
return
error_check_ret
try
:
tokenizer
=
self
.
renderer
.
tokenizer
tool_parser
=
self
.
tool_parser
...
...
@@ -142,8 +137,7 @@ class OpenAIServingRender:
)
if
request
.
tools
is
None
or
(
request
.
tool_choice
==
"none"
and
self
.
exclude_tools_when_tool_choice_none
request
.
tool_choice
==
"none"
and
self
.
exclude_tools_when_tool_choice_none
):
tool_dicts
=
None
else
:
...
...
@@ -174,9 +168,6 @@ class OpenAIServingRender:
conversation
,
engine_prompts
=
self
.
_make_request_with_harmony
(
request
,
should_include_tools
)
except
(
ValueError
,
TypeError
,
RuntimeError
,
jinja2
.
TemplateError
)
as
e
:
logger
.
exception
(
"Error in preprocessing prompt inputs"
)
return
self
.
create_error_response
(
e
)
return
conversation
,
engine_prompts
...
...
@@ -204,15 +195,11 @@ class OpenAIServingRender:
"prompt_logprobs is not compatible with prompt embeds."
)
try
:
engine_prompts
=
await
self
.
_preprocess_completion
(
request
,
prompt_input
=
request
.
prompt
,
prompt_embeds
=
request
.
prompt_embeds
,
)
except
(
ValueError
,
TypeError
,
RuntimeError
,
jinja2
.
TemplateError
)
as
e
:
logger
.
exception
(
"Error in preprocessing prompt inputs"
)
return
self
.
create_error_response
(
e
)
return
engine_prompts
...
...
@@ -284,54 +271,7 @@ class OpenAIServingRender:
status_code
:
HTTPStatus
=
HTTPStatus
.
BAD_REQUEST
,
param
:
str
|
None
=
None
,
)
->
ErrorResponse
:
"""Copied from OpenAIServing.create_error_response."""
exc
:
Exception
|
None
=
None
if
isinstance
(
message
,
Exception
):
exc
=
message
from
vllm.exceptions
import
VLLMValidationError
if
isinstance
(
exc
,
VLLMValidationError
):
err_type
=
"BadRequestError"
status_code
=
HTTPStatus
.
BAD_REQUEST
param
=
exc
.
parameter
elif
isinstance
(
exc
,
(
ValueError
,
TypeError
,
RuntimeError
,
OverflowError
)):
# Common validation errors from user input
err_type
=
"BadRequestError"
status_code
=
HTTPStatus
.
BAD_REQUEST
param
=
None
elif
isinstance
(
exc
,
NotImplementedError
):
err_type
=
"NotImplementedError"
status_code
=
HTTPStatus
.
NOT_IMPLEMENTED
param
=
None
elif
exc
.
__class__
.
__name__
==
"TemplateError"
:
# jinja2.TemplateError (avoid importing jinja2)
err_type
=
"BadRequestError"
status_code
=
HTTPStatus
.
BAD_REQUEST
param
=
None
else
:
err_type
=
"InternalServerError"
status_code
=
HTTPStatus
.
INTERNAL_SERVER_ERROR
param
=
None
message
=
str
(
exc
)
if
self
.
log_error_stack
:
exc_type
,
_
,
_
=
sys
.
exc_info
()
if
exc_type
is
not
None
:
traceback
.
print_exc
()
else
:
traceback
.
print_stack
()
return
ErrorResponse
(
error
=
ErrorInfo
(
message
=
sanitize_message
(
message
),
type
=
err_type
,
code
=
status_code
.
value
,
param
=
param
,
)
)
return
create_error_response
(
message
,
err_type
,
status_code
,
param
)
def
_is_model_supported
(
self
,
model_name
:
str
)
->
bool
:
"""Simplified from OpenAIServing._is_model_supported (no LoRA support)."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment