Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0ab06100
Unverified
Commit
0ab06100
authored
Feb 12, 2026
by
Isotr0py
Committed by
GitHub
Feb 11, 2026
Browse files
[Multimodal] Expose `mm_processor_kwargs` for `DummyInputsBuilder` (#34330)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
ffb3d553
Changes
72
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
30 additions
and
3 deletions
+30
-3
vllm/model_executor/models/qwen_vl.py
vllm/model_executor/models/qwen_vl.py
+1
-0
vllm/model_executor/models/rvl.py
vllm/model_executor/models/rvl.py
+1
-0
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+1
-0
vllm/model_executor/models/skyworkr1v.py
vllm/model_executor/models/skyworkr1v.py
+1
-0
vllm/model_executor/models/step3_vl.py
vllm/model_executor/models/step3_vl.py
+1
-0
vllm/model_executor/models/terratorch.py
vllm/model_executor/models/terratorch.py
+1
-0
vllm/model_executor/models/transformers/multimodal.py
vllm/model_executor/models/transformers/multimodal.py
+1
-0
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/ultravox.py
+4
-1
vllm/model_executor/models/voxtral.py
vllm/model_executor/models/voxtral.py
+2
-0
vllm/model_executor/models/whisper.py
vllm/model_executor/models/whisper.py
+4
-1
vllm/multimodal/processing/dummy_inputs.py
vllm/multimodal/processing/dummy_inputs.py
+11
-1
vllm/multimodal/registry.py
vllm/multimodal/registry.py
+2
-0
No files found.
vllm/model_executor/models/qwen_vl.py
View file @
0ab06100
...
...
@@ -618,6 +618,7 @@ class QwenVLDummyInputsBuilder(BaseDummyInputsBuilder[QwenVLProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
hf_config
=
self
.
info
.
get_hf_config
()
vision_config
=
hf_config
.
visual
...
...
vllm/model_executor/models/rvl.py
View file @
0ab06100
...
...
@@ -41,6 +41,7 @@ class RVLDummyInputsBuilder(LlavaDummyInputsBuilder[RVLProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/siglip.py
View file @
0ab06100
...
...
@@ -155,6 +155,7 @@ class SiglipDummyInputsBuilder(BaseDummyInputsBuilder[SiglipProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/skyworkr1v.py
View file @
0ab06100
...
...
@@ -533,6 +533,7 @@ class SkyworkR1VDummyInputsBuilder(BaseDummyInputsBuilder[SkyworkR1VProcessingIn
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/step3_vl.py
View file @
0ab06100
...
...
@@ -565,6 +565,7 @@ class Step3VLDummyInputsBuilder(BaseDummyInputsBuilder[Step3VLProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/terratorch.py
View file @
0ab06100
...
...
@@ -154,6 +154,7 @@ class TerratorchInputBuilder(BaseDummyInputsBuilder[TerratorchProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
# Dummy data is generated based on the 'input' section
# defined in the HF configuration file
...
...
vllm/model_executor/models/transformers/multimodal.py
View file @
0ab06100
...
...
@@ -98,6 +98,7 @@ class MultiModalDummyInputsBuilder(BaseDummyInputsBuilder[MultiModalProcessingIn
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
"BaseDummyOptions"
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/ultravox.py
View file @
0ab06100
...
...
@@ -161,8 +161,11 @@ class UltravoxDummyInputsBuilder(BaseDummyInputsBuilder[UltravoxProcessingInfo])
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
feature_extractor
.
sampling_rate
audio_len
=
(
...
...
vllm/model_executor/models/voxtral.py
View file @
0ab06100
...
...
@@ -220,6 +220,7 @@ class VoxtralDummyInputsBuilder(BaseDummyInputsBuilder[VoxtralProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
...
...
@@ -238,6 +239,7 @@ class VoxtralDummyInputsBuilder(BaseDummyInputsBuilder[VoxtralProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
ProcessorInputs
:
tokenizer
=
self
.
info
.
get_tokenizer
()
...
...
vllm/model_executor/models/whisper.py
View file @
0ab06100
...
...
@@ -685,8 +685,11 @@ class WhisperDummyInputsBuilder(BaseDummyInputsBuilder[WhisperProcessingInfo]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
feature_extractor
.
sampling_rate
audio_len
=
feature_extractor
.
chunk_length
*
sampling_rate
...
...
vllm/multimodal/processing/dummy_inputs.py
View file @
0ab06100
...
...
@@ -63,6 +63,7 @@ class BaseDummyInputsBuilder(ABC, Generic[_I]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
"""
Build the multimodal input which, after processing, results in
...
...
@@ -83,6 +84,7 @@ class BaseDummyInputsBuilder(ABC, Generic[_I]):
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
ProcessorInputs
:
"""
Build the input which, after processing, results in
...
...
@@ -92,9 +94,16 @@ class BaseDummyInputsBuilder(ABC, Generic[_I]):
seq_len: Sequence length
mm_counts: Count of items per modality
mm_options: Configurable options per modality (optional)
mm_processor_kwargs: Additional keyword arguments
for hf_processor (optional)
"""
dummy_text
=
self
.
get_dummy_text
(
mm_counts
)
dummy_mm_data
=
self
.
get_dummy_mm_data
(
seq_len
,
mm_counts
,
mm_options
)
dummy_mm_data
=
self
.
get_dummy_mm_data
(
seq_len
,
mm_counts
,
mm_options
,
mm_processor_kwargs
=
mm_processor_kwargs
,
)
dummy_mm_items
=
self
.
info
.
parse_mm_data
(
dummy_mm_data
,
validate
=
False
)
tokenization_kwargs
=
{
"truncation"
:
False
}
...
...
@@ -102,6 +111,7 @@ class BaseDummyInputsBuilder(ABC, Generic[_I]):
return
ProcessorInputs
(
prompt
=
dummy_text
,
mm_items
=
dummy_mm_items
,
hf_processor_mm_kwargs
=
mm_processor_kwargs
or
{},
tokenization_kwargs
=
tokenization_kwargs
,
)
...
...
vllm/multimodal/registry.py
View file @
0ab06100
...
...
@@ -257,10 +257,12 @@ class MultiModalRegistry:
if
processor
is
None
:
processor
=
self
.
create_processor
(
model_config
,
cache
=
cache
)
mm_config
=
model_config
.
get_multimodal_config
()
processor_inputs
=
processor
.
dummy_inputs
.
get_dummy_processor_inputs
(
seq_len
=
seq_len
,
mm_counts
=
mm_counts
,
mm_options
=
self
.
_extract_mm_options
(
model_config
),
mm_processor_kwargs
=
mm_config
.
mm_processor_kwargs
,
)
mm_inputs
=
processor
.
apply
(
prompt
=
processor_inputs
.
prompt
,
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment