Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
56212b49
Unverified
Commit
56212b49
authored
Mar 03, 2026
by
Graham King
Committed by
GitHub
Mar 03, 2026
Browse files
fix(frontend): Update vllm processor for vllm 0.16 (#6799)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
9fe03dd8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
36 deletions
+18
-36
components/src/dynamo/frontend/prepost.py
components/src/dynamo/frontend/prepost.py
+17
-16
components/src/dynamo/frontend/vllm_processor.py
components/src/dynamo/frontend/vllm_processor.py
+1
-20
No files found.
components/src/dynamo/frontend/prepost.py
View file @
56212b49
...
...
@@ -11,6 +11,7 @@ from typing import Any
from
vllm.entrypoints.openai.chat_completion.protocol
import
ChatCompletionRequest
from
vllm.entrypoints.openai.engine.protocol
import
DeltaMessage
,
DeltaToolCall
from
vllm.reasoning
import
ReasoningParser
from
vllm.renderers
import
ChatParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers
import
ToolParser
...
...
@@ -73,9 +74,7 @@ def _prepare_request(
*
,
tokenizer
:
TokenizerLike
,
tool_parser_class
:
type
[
ToolParser
]
|
None
,
)
->
tuple
[
ChatCompletionRequest
,
ToolParser
|
None
,
dict
[
str
,
Any
],
Any
,
dict
[
str
,
Any
]
]:
)
->
tuple
[
ChatCompletionRequest
,
ToolParser
|
None
,
dict
[
str
,
Any
],
Any
,
ChatParams
]:
"""Validate request and build arguments for template rendering.
Returns:
...
...
@@ -83,7 +82,7 @@ def _prepare_request(
tool_parser: Instantiated tool parser, or None.
chat_template_kwargs: Template kwargs (for PreprocessResult).
messages_for_render: Messages to pass as first arg to render_messages.
render_kwargs: Keyword argument
s for render_messages / render_messages_async.
chat_params: ChatParam
s for render_messages / render_messages_async.
"""
if
isinstance
(
request
,
ChatCompletionRequest
):
request_for_sampling
=
request
...
...
@@ -123,15 +122,17 @@ def _prepare_request(
else
request_for_sampling
.
messages
)
render_kwargs
=
dict
(
chat_params
=
ChatParams
(
chat_template
=
request_for_sampling
.
chat_template
,
chat_template_content_format
=
"auto"
,
add_generation_prompt
=
request_for_sampling
.
add_generation_prompt
,
continue_final_message
=
request_for_sampling
.
continue_final_message
,
tools
=
tool_dicts
,
documents
=
request_for_sampling
.
documents
,
tokenize
=
tokenize_in_template
,
**
chat_template_kwargs
,
chat_template_kwargs
=
dict
(
add_generation_prompt
=
request_for_sampling
.
add_generation_prompt
,
continue_final_message
=
request_for_sampling
.
continue_final_message
,
tools
=
tool_dicts
,
documents
=
request_for_sampling
.
documents
,
tokenize
=
tokenize_in_template
,
**
chat_template_kwargs
,
),
)
return
(
...
...
@@ -139,7 +140,7 @@ def _prepare_request(
tool_parser
,
chat_template_kwargs
,
messages_for_render
,
render_kwarg
s
,
chat_param
s
,
)
...
...
@@ -155,12 +156,12 @@ async def preprocess_chat_request(
tool_parser
,
chat_template_kwargs
,
messages
,
render_kwarg
s
,
chat_param
s
,
)
=
_prepare_request
(
request
,
tokenizer
=
tokenizer
,
tool_parser_class
=
tool_parser_class
)
_
,
engine_prompt
=
await
renderer
.
render_messages_async
(
messages
,
**
render_kwarg
s
)
_
,
engine_prompt
=
await
renderer
.
render_messages_async
(
messages
,
chat_param
s
)
if
"prompt_token_ids"
in
engine_prompt
:
tokens
=
list
(
engine_prompt
[
"prompt_token_ids"
])
...
...
@@ -194,12 +195,12 @@ def preprocess_chat_request_sync(
tool_parser
,
chat_template_kwargs
,
messages
,
render_kwarg
s
,
chat_param
s
,
)
=
_prepare_request
(
request
,
tokenizer
=
tokenizer
,
tool_parser_class
=
tool_parser_class
)
_
,
engine_prompt
=
renderer
.
render_messages
(
messages
,
**
render_kwarg
s
)
_
,
engine_prompt
=
renderer
.
render_messages
(
messages
,
chat_param
s
)
if
"prompt_token_ids"
in
engine_prompt
:
tokens
=
list
(
engine_prompt
[
"prompt_token_ids"
])
...
...
components/src/dynamo/frontend/vllm_processor.py
View file @
56212b49
...
...
@@ -83,8 +83,6 @@ def map_finish_reason(raw_reason: str | None) -> FinishReason | None:
_w_input_processor
:
InputProcessor
|
None
=
None
_w_tokenizer
:
Any
=
None
_w_tool_parser_class
:
type
[
ToolParser
]
|
None
=
None
_w_reasoning_parser_class
:
type
[
ReasoningParser
]
|
None
=
None
_w_stream_interval
:
int
=
20
class
_PreprocessError
(
Exception
):
...
...
@@ -113,12 +111,9 @@ def _init_worker(
config_format
:
str
,
load_format
:
str
,
tool_parser_name
:
str
|
None
,
reasoning_parser_name
:
str
|
None
,
stream_interval
:
int
,
)
->
None
:
"""Initialize a worker process with its own VllmConfig and InputProcessor."""
global
_w_input_processor
,
_w_tokenizer
,
_w_tool_parser_class
global
_w_reasoning_parser_class
,
_w_stream_interval
model_config
=
ModelConfig
(
model
=
model_path
,
...
...
@@ -139,14 +134,6 @@ def _init_worker(
else
:
_w_tool_parser_class
=
None
if
reasoning_parser_name
:
_w_reasoning_parser_class
=
ReasoningParserManager
.
get_reasoning_parser
(
reasoning_parser_name
)
else
:
_w_reasoning_parser_class
=
None
_w_stream_interval
=
max
(
1
,
stream_interval
)
def
_worker_warmup
()
->
bool
:
"""Dummy task to ensure worker process is fully initialized."""
...
...
@@ -158,11 +145,7 @@ def _preprocess_worker(
request_id
:
str
,
model_name
:
str
,
)
->
PreprocessWorkerResult
:
"""Preprocess a request in a worker process and return a picklable result.
This replaces _request_handler's Phase A. No queues — errors propagate
naturally via the Future.
"""
"""Preprocess a request in a worker process and return a picklable result."""
pre
=
preprocess_chat_request_sync
(
request
,
tokenizer
=
_w_tokenizer
,
...
...
@@ -838,8 +821,6 @@ class EngineFactory:
config_format
,
load_format
,
tool_parser_name
,
reasoning_parser_name
,
self
.
stream_interval
,
),
)
# Warm up all workers to ensure initialization completes
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment