Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d43be1f3
Unverified
Commit
d43be1f3
authored
Feb 27, 2026
by
Kris Hung
Committed by
GitHub
Feb 27, 2026
Browse files
fix: Fix chat processor for vllm video/audio examples (#6689)
parent
55c0a769
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
31 deletions
+20
-31
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+2
-1
examples/multimodal/utils/chat_processor.py
examples/multimodal/utils/chat_processor.py
+18
-30
No files found.
examples/multimodal/components/processor.py
View file @
d43be1f3
...
@@ -15,7 +15,8 @@ from typing import AsyncIterator, Tuple, Union
...
@@ -15,7 +15,8 @@ from typing import AsyncIterator, Tuple, Union
import
uvloop
import
uvloop
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.entrypoints.openai.protocol
import
ChatCompletionRequest
,
CompletionRequest
from
vllm.entrypoints.openai.chat_completion.protocol
import
ChatCompletionRequest
from
vllm.entrypoints.openai.completion.protocol
import
CompletionRequest
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
...
...
examples/multimodal/utils/chat_processor.py
View file @
d43be1f3
...
@@ -20,15 +20,15 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
...
@@ -20,15 +20,15 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.entrypoints.chat_utils
import
ConversationMessage
from
vllm.entrypoints.chat_utils
import
ConversationMessage
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.chat_completion.protocol
import
ChatCompletionRequest
ChatCompletionRequest
,
from
vllm.entrypoints.openai.chat_completion.serving
import
OpenAIServingChat
CompletionRequest
,
from
vllm.entrypoints.openai.completion.protocol
import
CompletionRequest
RequestResponseMetadata
,
from
vllm.entrypoints.openai.completion.serving
import
OpenAIServingCompletion
)
from
vllm.entrypoints.openai.engine.protocol
import
RequestResponseMetadata
from
vllm.entrypoints.openai.serving_chat
import
OpenAIServingChat
from
vllm.entrypoints.openai.models.protocol
import
BaseModelPath
from
vllm.entrypoints.openai.serving_completion
import
OpenAIServingCompletion
from
vllm.entrypoints.openai.models.serving
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
BaseModelPath
,
OpenAIServingModels
from
vllm.inputs.data
import
TokensPrompt
from
vllm.inputs.data
import
TokensPrompt
from
vllm.renderers.registry
import
renderer_from_config
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
from
vllm.tokenizers
import
TokenizerLike
as
AnyTokenizer
...
@@ -41,6 +41,7 @@ class StubEngineClient:
...
@@ -41,6 +41,7 @@ class StubEngineClient:
def
__init__
(
self
,
model_config
:
ModelConfig
):
def
__init__
(
self
,
model_config
:
ModelConfig
):
self
.
model_config
=
model_config
self
.
model_config
=
model_config
self
.
renderer
=
renderer_from_config
(
model_config
)
self
.
input_processor
=
None
self
.
input_processor
=
None
self
.
io_processor
=
None
self
.
io_processor
=
None
...
@@ -154,9 +155,6 @@ class ChatProcessor:
...
@@ -154,9 +155,6 @@ class ChatProcessor:
async
def
preprocess
(
self
,
raw_request
:
ChatCompletionRequest
)
->
PreprocessResult
:
async
def
preprocess
(
self
,
raw_request
:
ChatCompletionRequest
)
->
PreprocessResult
:
request
=
self
.
parse_raw_request
(
raw_request
)
request
=
self
.
parse_raw_request
(
raw_request
)
# TODO: Revisit this later when adding multi-modal support for the frontend.
# If no chat template is provided and tokenizer doesn't have one,
# use a simple format that just concatenates messages
if
not
request
.
chat_template
and
not
self
.
tokenizer
.
chat_template
:
if
not
request
.
chat_template
and
not
self
.
tokenizer
.
chat_template
:
chat_template
=
"{% for message in messages %}{% if message['role'] == 'user' %}User: {{ message['content'] }}
\n
{% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] }}
\n
{% endif %}{% endfor %}Assistant:"
chat_template
=
"{% for message in messages %}{% if message['role'] == 'user' %}User: {{ message['content'] }}
\n
{% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] }}
\n
{% endif %}{% endfor %}Assistant:"
else
:
else
:
...
@@ -167,20 +165,14 @@ class ChatProcessor:
...
@@ -167,20 +165,14 @@ class ChatProcessor:
engine_prompts
,
engine_prompts
,
)
=
await
self
.
openai_serving
.
_preprocess_chat
(
)
=
await
self
.
openai_serving
.
_preprocess_chat
(
request
,
request
,
self
.
tokenizer
,
request
.
messages
,
request
.
messages
,
chat_template
=
chat_template
,
default_template
=
chat_template
,
chat_template_content_format
=
self
.
openai_serving
.
chat_template_content_format
,
default_template_content_format
=
self
.
openai_serving
.
chat_template_content_format
,
add_generation_prompt
=
request
.
add_generation_prompt
,
default_template_kwargs
=
None
,
continue_final_message
=
request
.
continue_final_message
,
tool_dicts
=
None
,
tool_dicts
=
None
,
documents
=
request
.
documents
,
tool_parser
=
None
,
chat_template_kwargs
=
request
.
chat_template_kwargs
,
tool_parser
=
self
.
openai_serving
.
tool_parser
,
add_special_tokens
=
request
.
add_special_tokens
,
)
)
# In newer vLLM, _preprocess_chat returns (conversation, engine_prompts) - 2 values
if
not
conversation
or
not
engine_prompts
:
if
not
conversation
or
not
engine_prompts
:
raise
ValueError
(
raise
ValueError
(
"Preprocessing returned empty conversation or engine_prompts"
"Preprocessing returned empty conversation or engine_prompts"
...
@@ -305,19 +297,14 @@ class CompletionsProcessor:
...
@@ -305,19 +297,14 @@ class CompletionsProcessor:
async
def
preprocess
(
self
,
raw_request
:
CompletionRequest
)
->
PreprocessResult
:
async
def
preprocess
(
self
,
raw_request
:
CompletionRequest
)
->
PreprocessResult
:
request
=
self
.
parse_raw_request
(
raw_request
)
request
=
self
.
parse_raw_request
(
raw_request
)
# In newer vLLM, _preprocess_completion was removed
engine_prompts
=
await
self
.
openai_serving
.
_preprocess_completion
(
# Use the renderer approach instead
request
,
renderer
=
self
.
openai_serving
.
_get_renderer
(
self
.
tokenizer
)
prompt_input
=
request
.
prompt
,
config
=
self
.
openai_serving
.
_build_render_config
(
request
)
engine_prompts
=
await
renderer
.
render_prompt_and_embeds
(
prompt_or_prompts
=
request
.
prompt
,
prompt_embeds
=
getattr
(
request
,
"prompt_embeds"
,
None
),
prompt_embeds
=
getattr
(
request
,
"prompt_embeds"
,
None
),
config
=
config
,
)
)
# engine_prompts is now a list of TokensPrompt
if
not
engine_prompts
:
if
not
engine_prompts
:
raise
ValueError
(
"
Renderer
returned empty engine_prompts"
)
raise
ValueError
(
"
Preprocessing
returned empty engine_prompts"
)
return
PreprocessResult
(
None
,
engine_prompts
[
0
])
return
PreprocessResult
(
None
,
engine_prompts
[
0
])
async
def
stream_response
(
async
def
stream_response
(
...
@@ -332,6 +319,7 @@ class CompletionsProcessor:
...
@@ -332,6 +319,7 @@ class CompletionsProcessor:
raise
ValueError
(
"Only streaming responses are supported"
)
raise
ValueError
(
"Only streaming responses are supported"
)
async
for
raw_response
in
self
.
openai_serving
.
completion_stream_generator
(
async
for
raw_response
in
self
.
openai_serving
.
completion_stream_generator
(
request
,
request
,
[],
# engine_prompts (not needed for streaming output)
result_generator
,
result_generator
,
request_id
,
request_id
,
int
(
time
.
time
()),
# created_time
int
(
time
.
time
()),
# created_time
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment