Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c021814f
Unverified
Commit
c021814f
authored
Apr 07, 2026
by
Biswa Panda
Committed by
GitHub
Apr 07, 2026
Browse files
feat(frontend): allowing passing vllm chat processor specific flags in frontend (#7896)
parent
942070c2
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
1 deletion
+26
-1
components/src/dynamo/frontend/main.py
components/src/dynamo/frontend/main.py
+11
-0
components/src/dynamo/frontend/prepost.py
components/src/dynamo/frontend/prepost.py
+8
-1
components/src/dynamo/frontend/vllm_processor.py
components/src/dynamo/frontend/vllm_processor.py
+7
-0
No files found.
components/src/dynamo/frontend/main.py
View file @
c021814f
...
...
@@ -114,6 +114,17 @@ def parse_args() -> tuple[FrontendConfig, Optional[Namespace], Optional[Namespac
vllm_flags
=
None
sglang_flags
=
None
# --trust-remote-code is only meaningful with --dyn-chat-processor vllm.
# Warn and strip it when a different (or no) chat processor is active so
# it does not propagate as an unknown-argument error below.
if
"--trust-remote-code"
in
unknown
and
config
.
chat_processor
!=
"vllm"
:
logger
.
warning
(
"--trust-remote-code has no effect without '--dyn-chat-processor vllm'. "
"It is only supported by the vLLM chat processor. "
"Pass '--dyn-chat-processor vllm' to enable trust_remote_code."
)
unknown
=
[
arg
for
arg
in
unknown
if
arg
!=
"--trust-remote-code"
]
# parse extra vllm flags using vllm native parser.
if
config
.
chat_processor
==
"vllm"
:
try
:
...
...
components/src/dynamo/frontend/prepost.py
View file @
c021814f
...
...
@@ -80,6 +80,7 @@ def _prepare_request(
tokenizer
:
TokenizerLike
,
tool_parser_class
:
type
[
ToolParser
]
|
None
,
exclude_tools_when_tool_choice_none
:
bool
=
True
,
enable_auto_tool_choice
:
bool
=
False
,
)
->
tuple
[
ChatCompletionRequest
,
ToolParser
|
None
,
dict
[
str
,
Any
],
Any
,
ChatParams
]:
"""Validate request and build arguments for template rendering.
...
...
@@ -103,7 +104,11 @@ def _prepare_request(
request_for_sampling
=
ChatCompletionRequest
.
model_validate
(
request
)
tool_parser
:
ToolParser
|
None
=
None
if
tool_parser_class
and
request_for_sampling
.
tools
:
# With enable_auto_tool_choice the model may emit tool calls even when the
# client did not supply an explicit `tools` list, so we activate the parser
# whenever the tool_parser_class is available.
has_tools
=
bool
(
request_for_sampling
.
tools
)
if
tool_parser_class
and
(
has_tools
or
enable_auto_tool_choice
):
if
request_for_sampling
.
tool_choice
!=
"none"
:
tool_parser
=
tool_parser_class
(
tokenizer
)
request_for_sampling
=
tool_parser
.
adjust_request
(
request_for_sampling
)
...
...
@@ -163,6 +168,7 @@ async def preprocess_chat_request(
renderer
,
tool_parser_class
:
type
[
ToolParser
]
|
None
,
exclude_tools_when_tool_choice_none
:
bool
=
True
,
enable_auto_tool_choice
:
bool
=
False
,
)
->
PreprocessResult
:
(
request_for_sampling
,
...
...
@@ -175,6 +181,7 @@ async def preprocess_chat_request(
tokenizer
=
tokenizer
,
tool_parser_class
=
tool_parser_class
,
exclude_tools_when_tool_choice_none
=
exclude_tools_when_tool_choice_none
,
enable_auto_tool_choice
=
enable_auto_tool_choice
,
)
_
,
engine_prompt
=
await
renderer
.
render_messages_async
(
messages
,
chat_params
)
...
...
components/src/dynamo/frontend/vllm_processor.py
View file @
c021814f
...
...
@@ -77,6 +77,7 @@ class VllmProcessor:
output_processor
:
OutputProcessor
,
tool_parser_class
:
type
[
ToolParser
]
|
None
,
reasoning_parser_class
:
type
[
ReasoningParser
]
|
None
,
enable_auto_tool_choice
:
bool
=
False
,
):
self
.
tokenizer
=
tokenizer
self
.
input_processor
=
input_processor
...
...
@@ -86,6 +87,7 @@ class VllmProcessor:
self
.
tool_parser_class
=
tool_parser_class
self
.
reasoning_parser_class
=
reasoning_parser_class
self
.
exclude_tools_when_tool_choice_none
=
True
self
.
enable_auto_tool_choice
=
enable_auto_tool_choice
def
_get_eos_token_ids
(
self
)
->
list
[
int
]:
"""Return EOS token ids using tokenizer metadata.
...
...
@@ -144,6 +146,7 @@ class VllmProcessor:
renderer
=
self
.
input_processor
.
renderer
,
tool_parser_class
=
self
.
tool_parser_class
,
exclude_tools_when_tool_choice_none
=
self
.
exclude_tools_when_tool_choice_none
,
enable_auto_tool_choice
=
self
.
enable_auto_tool_choice
,
)
request_for_sampling
=
pre
.
request_for_sampling
...
...
@@ -433,11 +436,14 @@ class EngineFactory:
tokenizer_mode
=
getattr
(
self
.
flags
,
"tokenizer_mode"
,
None
)
or
"auto"
config_format
=
getattr
(
self
.
flags
,
"config_format"
,
None
)
or
"auto"
load_format
=
getattr
(
self
.
flags
,
"load_format"
,
None
)
or
"dummy"
trust_remote_code
=
getattr
(
self
.
flags
,
"trust_remote_code"
,
False
)
enable_auto_tool_choice
=
getattr
(
self
.
flags
,
"enable_auto_tool_choice"
,
False
)
model_config
=
ModelConfig
(
model
=
source_path
,
tokenizer_mode
=
tokenizer_mode
,
config_format
=
config_format
,
trust_remote_code
=
trust_remote_code
,
)
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
...
...
@@ -496,6 +502,7 @@ class EngineFactory:
output_processor
,
tool_parser_class
,
reasoning_parser_class
,
enable_auto_tool_choice
=
enable_auto_tool_choice
,
)
gen
.
exclude_tools_when_tool_choice_none
=
(
self
.
config
.
exclude_tools_when_tool_choice_none
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment