Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7ca1da02
Unverified
Commit
7ca1da02
authored
Feb 25, 2025
by
Roger Wang
Committed by
GitHub
Feb 25, 2025
Browse files
[Misc] Fix input processing for Ultravox (#13871)
parent
5157338e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
15 deletions
+6
-15
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+3
-3
tests/models/registry.py
tests/models/registry.py
+1
-1
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/ultravox.py
+2
-11
No files found.
tests/models/multimodal/processing/test_common.py
View file @
7ca1da02
...
...
@@ -83,8 +83,8 @@ def _test_processing_correctness(
}
tokenizer_encode_kwargs
=
{}
if
model_config
.
hf_config
.
model_type
in
(
"mllama"
,
"whisper"
):
# For some
encoder-decoder
models, tokenizer will always add bos_token
if
model_config
.
hf_config
.
model_type
in
(
"mllama"
,
"whisper"
,
"ultravox"
):
# For some
multimodal
models, tokenizer will always add bos_token
# at the beginning of prompt by default, causing hf_processor outputs
# incorrect token ids. So we need use `add_special_tokens=False` here
# to leave bos_token to be added by the processor.
...
...
@@ -172,7 +172,7 @@ def _test_processing_correctness(
"Qwen/Qwen2-VL-2B-Instruct"
,
"Qwen/Qwen2.5-VL-3B-Instruct"
,
"Qwen/Qwen2-Audio-7B-Instruct"
,
"fixie-ai/ultravox-v0_
5-llama-3_2-1b
"
,
"fixie-ai/ultravox-v0_
4
"
,
"openai/whisper-large-v3"
,
])
@
pytest
.
mark
.
parametrize
(
"hit_rate"
,
[
0.3
,
0.5
,
1.0
])
...
...
tests/models/registry.py
View file @
7ca1da02
...
...
@@ -284,7 +284,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
,
# noqa: E501
min_transformers_version
=
"4.49"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_
5-llama-3_2-1b
"
,
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_
4
"
,
trust_remote_code
=
True
),
# [Encoder-decoder]
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
...
...
vllm/model_executor/models/ultravox.py
View file @
7ca1da02
...
...
@@ -146,7 +146,8 @@ class UltravoxMultiModalProcessor(
)
->
BatchFeature
:
# Text-only input not supported in composite processor
if
not
mm_data
or
not
mm_data
.
get
(
"audios"
,
[]):
prompt_ids
=
self
.
info
.
get_tokenizer
().
encode
(
prompt
)
prompt_ids
=
self
.
info
.
get_tokenizer
().
encode
(
prompt
,
add_special_tokens
=
False
)
prompt_ids
=
self
.
_apply_hf_processor_tokens_only
(
prompt_ids
)
return
BatchFeature
(
dict
(
input_ids
=
[
prompt_ids
]),
tensor_type
=
"pt"
)
...
...
@@ -185,16 +186,6 @@ class UltravoxMultiModalProcessor(
)
return
BatchFeature
(
combined_outputs
)
def
_apply_hf_processor_tokens_only
(
self
,
prompt_tokens
:
list
[
int
],
)
->
list
[
int
]:
# HF processor omits bos_token_id by setting add_special_tokens=False
tokenizer
=
self
.
info
.
get_tokenizer
()
assert
prompt_tokens
[
0
]
==
tokenizer
.
bos_token_id
return
prompt_tokens
[
1
:]
def
_get_mm_fields_config
(
self
,
hf_inputs
:
BatchFeature
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment