Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
527ca321
Unverified
Commit
527ca321
authored
Feb 11, 2026
by
Raushan Turganbay
Committed by
GitHub
Feb 11, 2026
Browse files
[Bugfix] Fix more multimodal tests for transformers V5 (#34334)
Signed-off-by:
raushan
<
raushan@huggingface.co
>
parent
5458eb83
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
18 additions
and
11 deletions
+18
-11
tests/models/multimodal/processing/test_common.py
tests/models/multimodal/processing/test_common.py
+1
-0
vllm/model_executor/models/glmasr.py
vllm/model_executor/models/glmasr.py
+3
-3
vllm/model_executor/models/glmasr_utils.py
vllm/model_executor/models/glmasr_utils.py
+2
-2
vllm/model_executor/models/lfm2_vl.py
vllm/model_executor/models/lfm2_vl.py
+3
-1
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+9
-5
No files found.
tests/models/multimodal/processing/test_common.py
View file @
527ca321
...
@@ -108,6 +108,7 @@ _ADD_SPECIAL_TOKENS_OVERRIDES = {
...
@@ -108,6 +108,7 @@ _ADD_SPECIAL_TOKENS_OVERRIDES = {
"paligemma"
:
False
,
"paligemma"
:
False
,
"ultravox"
:
False
,
"ultravox"
:
False
,
"whisper"
:
False
,
"whisper"
:
False
,
"lfm2_vl"
:
False
,
}
}
_IGNORE_MM_KEYS
=
{
_IGNORE_MM_KEYS
=
{
...
...
vllm/model_executor/models/glmasr.py
View file @
527ca321
...
@@ -810,9 +810,9 @@ class GlmAsrMultiModalProcessor(BaseMultiModalProcessor["GlmAsrProcessingInfo"])
...
@@ -810,9 +810,9 @@ class GlmAsrMultiModalProcessor(BaseMultiModalProcessor["GlmAsrProcessingInfo"])
# Postprocess: rename mask and add chunk counts
# Postprocess: rename mask and add chunk counts
# Handle different key names from different transformers versions
# Handle different key names from different transformers versions
if
"input_feature_mask"
in
outputs
:
if
"input_feature
s
_mask"
in
outputs
:
outputs
[
"feature_attention_mask"
]
=
outputs
.
pop
(
"input_feature_mask"
)
outputs
[
"feature_attention_mask"
]
=
outputs
.
pop
(
"input_feature
s
_mask"
)
elif
"feature
_attention
_mask"
not
in
outputs
and
"input_features"
in
outputs
:
elif
"
input_
feature
s
_mask"
not
in
outputs
and
"input_features"
in
outputs
:
# If no mask is provided, create one from input_features
# If no mask is provided, create one from input_features
input_features
=
outputs
[
"input_features"
]
input_features
=
outputs
[
"input_features"
]
if
isinstance
(
input_features
,
torch
.
Tensor
):
if
isinstance
(
input_features
,
torch
.
Tensor
):
...
...
vllm/model_executor/models/glmasr_utils.py
View file @
527ca321
...
@@ -18,8 +18,8 @@ def _calculate_conv_output_length(
...
@@ -18,8 +18,8 @@ def _calculate_conv_output_length(
input_length
:
torch
.
Tensor
,
padding
:
int
,
kernel_size
:
int
,
stride
:
int
input_length
:
torch
.
Tensor
,
padding
:
int
,
kernel_size
:
int
,
stride
:
int
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
"""Calculate Conv1d output length using standard formula."""
"""Calculate Conv1d output length using standard formula."""
#
Standard formula: floor((input + 2*padding - kernel_size) / stride) + 1
#
in sync with `hf_processor._get_audio_token_length`
return
(
input_length
+
2
*
padding
-
kernel_size
)
//
stride
+
1
return
(
input_length
+
2
*
padding
-
(
kernel_size
-
1
)
-
1
)
//
stride
+
1
def
_as_list_chunk_counts
(
def
_as_list_chunk_counts
(
...
...
vllm/model_executor/models/lfm2_vl.py
View file @
527ca321
...
@@ -347,7 +347,9 @@ class Lfm2VLMultiModalProcessor(BaseMultiModalProcessor[Lfm2VLProcessingInfo]):
...
@@ -347,7 +347,9 @@ class Lfm2VLMultiModalProcessor(BaseMultiModalProcessor[Lfm2VLProcessingInfo]):
)
->
BatchFeature
:
)
->
BatchFeature
:
# Text-only input not supported in composite processor
# Text-only input not supported in composite processor
if
not
(
images
:
=
mm_data
.
get
(
"images"
,
[])):
if
not
(
images
:
=
mm_data
.
get
(
"images"
,
[])):
prompt_ids
=
self
.
info
.
get_tokenizer
().
encode
(
prompt
)
prompt_ids
=
self
.
info
.
get_tokenizer
().
encode
(
prompt
,
add_special_tokens
=
False
)
prompt_ids
=
self
.
_apply_hf_processor_tokens_only
(
prompt_ids
)
prompt_ids
=
self
.
_apply_hf_processor_tokens_only
(
prompt_ids
)
return
BatchFeature
(
dict
(
input_ids
=
[
prompt_ids
]),
tensor_type
=
"pt"
)
return
BatchFeature
(
dict
(
input_ids
=
[
prompt_ids
]),
tensor_type
=
"pt"
)
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
527ca321
...
@@ -1467,15 +1467,15 @@ class Tarsier2ImageProcessor(Qwen2VLImageProcessor):
...
@@ -1467,15 +1467,15 @@ class Tarsier2ImageProcessor(Qwen2VLImageProcessor):
class
Tarsier2Processor
(
Qwen2VLProcessor
):
class
Tarsier2Processor
(
Qwen2VLProcessor
):
def
__init__
(
def
__init__
(
self
,
self
,
vision_config
:
dict
,
image_processor
:
Tarsier2ImageProcessor
,
tokenizer
:
TokenizerLike
,
tokenizer
:
TokenizerLike
,
video_processor
:
Qwen2VLVideoProcessor
,
**
kwargs
,
**
kwargs
,
):
):
self
.
image_processor
=
Tarsier2ImageProcessor
(
**
vision_config
)
super
().
__init__
(
super
().
__init__
(
image_processor
=
self
.
image_processor
,
image_processor
=
image_processor
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
video_processor
=
Qwen2VLV
ideo
P
rocessor
(
**
vision_config
)
,
video_processor
=
v
ideo
_p
rocessor
,
chat_template
=
None
,
chat_template
=
None
,
**
kwargs
,
**
kwargs
,
)
)
...
@@ -1489,8 +1489,12 @@ class Tarsier2ProcessingInfo(Qwen2VLProcessingInfo):
...
@@ -1489,8 +1489,12 @@ class Tarsier2ProcessingInfo(Qwen2VLProcessingInfo):
return
correct_config
return
correct_config
def
get_hf_processor
(
self
,
**
kwargs
:
object
)
->
Tarsier2Processor
:
def
get_hf_processor
(
self
,
**
kwargs
:
object
)
->
Tarsier2Processor
:
vision_config
=
self
.
ctx
.
get_hf_image_processor_config
()
image_processor
=
Tarsier2ImageProcessor
(
**
vision_config
)
video_processor
=
Qwen2VLVideoProcessor
(
**
vision_config
)
return
Tarsier2Processor
(
return
Tarsier2Processor
(
vision_config
=
self
.
ctx
.
get_hf_image_processor_config
(),
image_processor
=
image_processor
,
video_processor
=
video_processor
,
tokenizer
=
self
.
get_tokenizer
(),
tokenizer
=
self
.
get_tokenizer
(),
**
kwargs
,
**
kwargs
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment