Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d0bed837
Unverified
Commit
d0bed837
authored
Oct 11, 2025
by
Chauncey
Committed by
GitHub
Oct 11, 2025
Browse files
[Refactor]Reduce duplicate code in serving_chat (#26627)
Signed-off-by:
chaunceyjiang
<
chaunceyjiang@gmail.com
>
parent
f7ee6986
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
56 additions
and
49 deletions
+56
-49
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+8
-36
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+46
-1
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+2
-12
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
d0bed837
...
@@ -6,7 +6,7 @@ import json
...
@@ -6,7 +6,7 @@ import json
import
time
import
time
from
collections.abc
import
AsyncGenerator
,
AsyncIterator
from
collections.abc
import
AsyncGenerator
,
AsyncIterator
from
collections.abc
import
Sequence
as
GenericSequence
from
collections.abc
import
Sequence
as
GenericSequence
from
typing
import
Callable
,
Final
,
Optional
,
Union
from
typing
import
Final
,
Optional
,
Union
import
jinja2
import
jinja2
import
partial_json_parser
import
partial_json_parser
...
@@ -56,14 +56,13 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -56,14 +56,13 @@ from vllm.entrypoints.openai.protocol import (
)
)
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
,
clamp_prompt_logprobs
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
,
clamp_prompt_logprobs
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.tool_parsers
import
ToolParser
,
ToolParserManager
from
vllm.entrypoints.openai.tool_parsers
import
ToolParser
from
vllm.entrypoints.openai.tool_parsers.mistral_tool_parser
import
MistralToolCall
from
vllm.entrypoints.openai.tool_parsers.mistral_tool_parser
import
MistralToolCall
from
vllm.entrypoints.utils
import
get_max_tokens
from
vllm.entrypoints.utils
import
get_max_tokens
from
vllm.inputs.data
import
TokensPrompt
as
EngineTokensPrompt
from
vllm.inputs.data
import
TokensPrompt
as
EngineTokensPrompt
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.logprobs
import
Logprob
from
vllm.logprobs
import
Logprob
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
from
vllm.reasoning
import
ReasoningParser
,
ReasoningParserManager
from
vllm.sampling_params
import
BeamSearchParams
,
SamplingParams
from
vllm.sampling_params
import
BeamSearchParams
,
SamplingParams
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
MistralTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
MistralTokenizer
from
vllm.transformers_utils.tokenizers
import
(
from
vllm.transformers_utils.tokenizers
import
(
...
@@ -112,42 +111,15 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -112,42 +111,15 @@ class OpenAIServingChat(OpenAIServing):
self
.
trust_request_chat_template
=
trust_request_chat_template
self
.
trust_request_chat_template
=
trust_request_chat_template
self
.
enable_log_outputs
=
enable_log_outputs
self
.
enable_log_outputs
=
enable_log_outputs
# set up reasoning parser
self
.
reasoning_parser
=
self
.
_get_reasoning_parser
(
reasoning_parser_name
=
reasoning_parser
)
# set up tool use
# set up tool use
self
.
enable_auto_tools
:
bool
=
enable_auto_tools
self
.
enable_auto_tools
:
bool
=
enable_auto_tools
if
self
.
enable_auto_tools
:
self
.
tool_parser
=
self
.
_get_tool_parser
(
logger
.
info
(
tool_parser_name
=
tool_parser
,
enable_auto_tools
=
enable_auto_tools
'"auto" tool choice has been enabled please note that while'
" the parallel_tool_calls client option is preset for "
"compatibility reasons, it will be ignored."
)
self
.
reasoning_parser
:
Optional
[
Callable
[[
AnyTokenizer
],
ReasoningParser
]]
=
(
None
)
)
if
reasoning_parser
:
try
:
self
.
reasoning_parser
=
ReasoningParserManager
.
get_reasoning_parser
(
reasoning_parser
)
assert
self
.
reasoning_parser
is
not
None
except
Exception
as
e
:
raise
TypeError
(
f
"
{
reasoning_parser
=
}
has not been registered"
)
from
e
self
.
tool_parser
:
Optional
[
Callable
[[
AnyTokenizer
],
ToolParser
]]
=
None
if
self
.
enable_auto_tools
:
try
:
if
tool_parser
==
"pythonic"
and
self
.
model_config
.
model
.
startswith
(
"meta-llama/Llama-3.2"
):
logger
.
warning
(
"Llama3.2 models may struggle to emit valid pythonic tool calls"
)
self
.
tool_parser
=
ToolParserManager
.
get_tool_parser
(
tool_parser
)
except
Exception
as
e
:
raise
TypeError
(
"Error: --enable-auto-tool-choice requires "
f
"tool_parser:'
{
tool_parser
}
' which has not "
"been registered"
)
from
e
self
.
exclude_tools_when_tool_choice_none
=
exclude_tools_when_tool_choice_none
self
.
exclude_tools_when_tool_choice_none
=
exclude_tools_when_tool_choice_none
self
.
enable_prompt_tokens_details
=
enable_prompt_tokens_details
self
.
enable_prompt_tokens_details
=
enable_prompt_tokens_details
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
d0bed837
...
@@ -63,7 +63,7 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -63,7 +63,7 @@ from vllm.entrypoints.openai.protocol import (
TranslationRequest
,
TranslationRequest
,
)
)
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.tool_parsers
import
ToolParser
from
vllm.entrypoints.openai.tool_parsers
import
ToolParser
,
ToolParserManager
from
vllm.entrypoints.renderer
import
BaseRenderer
,
CompletionRenderer
,
RenderConfig
from
vllm.entrypoints.renderer
import
BaseRenderer
,
CompletionRenderer
,
RenderConfig
from
vllm.entrypoints.utils
import
_validate_truncation_size
from
vllm.entrypoints.utils
import
_validate_truncation_size
from
vllm.inputs.data
import
PromptType
from
vllm.inputs.data
import
PromptType
...
@@ -82,6 +82,7 @@ from vllm.multimodal import ( # noqa: F401 - Required to resolve Pydantic error
...
@@ -82,6 +82,7 @@ from vllm.multimodal import ( # noqa: F401 - Required to resolve Pydantic error
)
)
from
vllm.outputs
import
CompletionOutput
,
PoolingRequestOutput
,
RequestOutput
from
vllm.outputs
import
CompletionOutput
,
PoolingRequestOutput
,
RequestOutput
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.reasoning
import
ReasoningParser
,
ReasoningParserManager
from
vllm.sampling_params
import
BeamSearchParams
,
SamplingParams
from
vllm.sampling_params
import
BeamSearchParams
,
SamplingParams
from
vllm.tracing
import
(
from
vllm.tracing
import
(
contains_trace_headers
,
contains_trace_headers
,
...
@@ -274,6 +275,50 @@ class OpenAIServing:
...
@@ -274,6 +275,50 @@ class OpenAIServing:
self
.
model_config
=
self
.
models
.
model_config
self
.
model_config
=
self
.
models
.
model_config
self
.
max_model_len
=
self
.
model_config
.
max_model_len
self
.
max_model_len
=
self
.
model_config
.
max_model_len
def
_get_tool_parser
(
self
,
tool_parser_name
:
Optional
[
str
]
=
None
,
enable_auto_tools
:
bool
=
False
)
->
Optional
[
Callable
[[
AnyTokenizer
],
ToolParser
]]:
"""Get the tool parser based on the name."""
parser
=
None
if
not
enable_auto_tools
or
tool_parser_name
is
None
:
return
parser
logger
.
info
(
'"auto" tool choice has been enabled please note that while'
" the parallel_tool_calls client option is preset for "
"compatibility reasons, it will be ignored."
)
try
:
if
tool_parser_name
==
"pythonic"
and
self
.
model_config
.
model
.
startswith
(
"meta-llama/Llama-3.2"
):
logger
.
warning
(
"Llama3.2 models may struggle to emit valid pythonic tool calls"
)
parser
=
ToolParserManager
.
get_tool_parser
(
tool_parser_name
)
except
Exception
as
e
:
raise
TypeError
(
"Error: --enable-auto-tool-choice requires "
f
"tool_parser:'
{
tool_parser_name
}
' which has not "
"been registered"
)
from
e
return
parser
def
_get_reasoning_parser
(
self
,
reasoning_parser_name
:
str
,
)
->
Optional
[
Callable
[[
AnyTokenizer
],
ReasoningParser
]]:
"""Get the reasoning parser based on the name."""
parser
=
None
if
not
reasoning_parser_name
:
return
None
try
:
parser
=
ReasoningParserManager
.
get_reasoning_parser
(
reasoning_parser_name
)
assert
parser
is
not
None
except
Exception
as
e
:
raise
TypeError
(
f
"
{
reasoning_parser_name
=
}
has not been registered"
)
from
e
return
parser
async
def
reset_mm_cache
(
self
)
->
None
:
async
def
reset_mm_cache
(
self
)
->
None
:
self
.
processor
.
clear_mm_cache
()
self
.
processor
.
clear_mm_cache
()
await
self
.
engine_client
.
reset_mm_cache
()
await
self
.
engine_client
.
reset_mm_cache
()
...
...
vllm/entrypoints/openai/serving_responses.py
View file @
d0bed837
...
@@ -96,7 +96,6 @@ from vllm.logger import init_logger
...
@@ -96,7 +96,6 @@ from vllm.logger import init_logger
from
vllm.logprobs
import
Logprob
as
SampleLogprob
from
vllm.logprobs
import
Logprob
as
SampleLogprob
from
vllm.logprobs
import
SampleLogprobs
from
vllm.logprobs
import
SampleLogprobs
from
vllm.outputs
import
CompletionOutput
from
vllm.outputs
import
CompletionOutput
from
vllm.reasoning
import
ReasoningParser
,
ReasoningParserManager
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils
import
random_uuid
from
vllm.utils
import
random_uuid
...
@@ -136,18 +135,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -136,18 +135,9 @@ class OpenAIServingResponses(OpenAIServing):
self
.
chat_template_content_format
:
Final
=
chat_template_content_format
self
.
chat_template_content_format
:
Final
=
chat_template_content_format
self
.
enable_log_outputs
=
enable_log_outputs
self
.
enable_log_outputs
=
enable_log_outputs
self
.
reasoning_parser
:
Optional
[
Callable
[[
AnyTokenizer
],
R
easoning
P
arser
]]
=
(
self
.
reasoning_parser
=
self
.
_get_r
easoning
_p
arser
(
None
reasoning_parser_name
=
reasoning_parser
)
)
if
reasoning_parser
:
try
:
self
.
reasoning_parser
=
ReasoningParserManager
.
get_reasoning_parser
(
reasoning_parser
)
assert
self
.
reasoning_parser
is
not
None
except
Exception
as
e
:
raise
TypeError
(
f
"
{
reasoning_parser
=
}
has not been registered"
)
from
e
self
.
enable_prompt_tokens_details
=
enable_prompt_tokens_details
self
.
enable_prompt_tokens_details
=
enable_prompt_tokens_details
self
.
enable_force_include_usage
=
enable_force_include_usage
self
.
enable_force_include_usage
=
enable_force_include_usage
self
.
default_sampling_params
=
self
.
model_config
.
get_diff_sampling_param
()
self
.
default_sampling_params
=
self
.
model_config
.
get_diff_sampling_param
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment