Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0ab06100
Unverified
Commit
0ab06100
authored
Feb 12, 2026
by
Isotr0py
Committed by
GitHub
Feb 11, 2026
Browse files
[Multimodal] Expose `mm_processor_kwargs` for `DummyInputsBuilder` (#34330)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
ffb3d553
Changes
72
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
32 additions
and
4 deletions
+32
-4
vllm/model_executor/models/aria.py
vllm/model_executor/models/aria.py
+1
-0
vllm/model_executor/models/audioflamingo3.py
vllm/model_executor/models/audioflamingo3.py
+4
-1
vllm/model_executor/models/aya_vision.py
vllm/model_executor/models/aya_vision.py
+1
-0
vllm/model_executor/models/bagel.py
vllm/model_executor/models/bagel.py
+1
-0
vllm/model_executor/models/bee.py
vllm/model_executor/models/bee.py
+1
-0
vllm/model_executor/models/blip2.py
vllm/model_executor/models/blip2.py
+1
-0
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+1
-0
vllm/model_executor/models/clip.py
vllm/model_executor/models/clip.py
+1
-0
vllm/model_executor/models/cohere2_vision.py
vllm/model_executor/models/cohere2_vision.py
+1
-0
vllm/model_executor/models/deepseek_ocr.py
vllm/model_executor/models/deepseek_ocr.py
+1
-0
vllm/model_executor/models/deepseek_ocr2.py
vllm/model_executor/models/deepseek_ocr2.py
+1
-0
vllm/model_executor/models/deepseek_vl2.py
vllm/model_executor/models/deepseek_vl2.py
+1
-0
vllm/model_executor/models/dots_ocr.py
vllm/model_executor/models/dots_ocr.py
+3
-0
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+1
-0
vllm/model_executor/models/funasr.py
vllm/model_executor/models/funasr.py
+4
-1
vllm/model_executor/models/funaudiochat.py
vllm/model_executor/models/funaudiochat.py
+5
-2
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/fuyu.py
+1
-0
vllm/model_executor/models/gemma3_mm.py
vllm/model_executor/models/gemma3_mm.py
+1
-0
vllm/model_executor/models/gemma3n_mm.py
vllm/model_executor/models/gemma3n_mm.py
+1
-0
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+1
-0
No files found.
vllm/model_executor/models/aria.py
View file @
0ab06100
...
@@ -445,6 +445,7 @@ class AriaDummyInputsBuilder(BaseDummyInputsBuilder[AriaProcessingInfo]):
...
@@ -445,6 +445,7 @@ class AriaDummyInputsBuilder(BaseDummyInputsBuilder[AriaProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
vision_config
=
self
.
info
.
get_vision_config
()
vision_config
=
self
.
info
.
get_vision_config
()
...
...
vllm/model_executor/models/audioflamingo3.py
View file @
0ab06100
...
@@ -253,8 +253,11 @@ class AudioFlamingo3DummyInputsBuilder(
...
@@ -253,8 +253,11 @@ class AudioFlamingo3DummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
feature_extractor
.
sampling_rate
sampling_rate
=
feature_extractor
.
sampling_rate
audio_len
=
MAX_AUDIO_LEN
*
sampling_rate
audio_len
=
MAX_AUDIO_LEN
*
sampling_rate
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
...
...
vllm/model_executor/models/aya_vision.py
View file @
0ab06100
...
@@ -192,6 +192,7 @@ class AyaVisionDummyInputsBuilder(BaseDummyInputsBuilder[AyaVisionProcessingInfo
...
@@ -192,6 +192,7 @@ class AyaVisionDummyInputsBuilder(BaseDummyInputsBuilder[AyaVisionProcessingInfo
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_size
=
self
.
info
.
get_image_size_with_most_features
()
image_size
=
self
.
info
.
get_image_size_with_most_features
()
...
...
vllm/model_executor/models/bagel.py
View file @
0ab06100
...
@@ -250,6 +250,7 @@ class BagelDummyInputsBuilder(BaseDummyInputsBuilder[BagelProcessingInfo]):
...
@@ -250,6 +250,7 @@ class BagelDummyInputsBuilder(BaseDummyInputsBuilder[BagelProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
hf_config
=
self
.
info
.
get_hf_config
()
hf_config
=
self
.
info
.
get_hf_config
()
...
...
vllm/model_executor/models/bee.py
View file @
0ab06100
...
@@ -91,6 +91,7 @@ class BeeDummyInputsBuilder(LlavaDummyInputsBuilder[BeeProcessingInfo]):
...
@@ -91,6 +91,7 @@ class BeeDummyInputsBuilder(LlavaDummyInputsBuilder[BeeProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/blip2.py
View file @
0ab06100
...
@@ -446,6 +446,7 @@ class Blip2DummyInputsBuilder(BaseDummyInputsBuilder[Blip2ProcessingInfo]):
...
@@ -446,6 +446,7 @@ class Blip2DummyInputsBuilder(BaseDummyInputsBuilder[Blip2ProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
hf_config
=
self
.
info
.
get_hf_config
()
hf_config
=
self
.
info
.
get_hf_config
()
vision_config
=
hf_config
.
vision_config
vision_config
=
hf_config
.
vision_config
...
...
vllm/model_executor/models/chameleon.py
View file @
0ab06100
...
@@ -117,6 +117,7 @@ class ChameleonDummyInputsBuilder(BaseDummyInputsBuilder[ChameleonProcessingInfo
...
@@ -117,6 +117,7 @@ class ChameleonDummyInputsBuilder(BaseDummyInputsBuilder[ChameleonProcessingInfo
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
config
=
self
.
info
.
get_hf_config
()
config
=
self
.
info
.
get_hf_config
()
...
...
vllm/model_executor/models/clip.py
View file @
0ab06100
...
@@ -171,6 +171,7 @@ class CLIPDummyInputsBuilder(BaseDummyInputsBuilder[CLIPProcessingInfo]):
...
@@ -171,6 +171,7 @@ class CLIPDummyInputsBuilder(BaseDummyInputsBuilder[CLIPProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/cohere2_vision.py
View file @
0ab06100
...
@@ -221,6 +221,7 @@ class Cohere2VisionDummyInputsBuilder(
...
@@ -221,6 +221,7 @@ class Cohere2VisionDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_size
=
self
.
info
.
get_image_size_with_most_features
()
image_size
=
self
.
info
.
get_image_size_with_most_features
()
...
...
vllm/model_executor/models/deepseek_ocr.py
View file @
0ab06100
...
@@ -256,6 +256,7 @@ class DeepseekOCRDummyInputsBuilder(BaseDummyInputsBuilder[DeepseekOCRProcessing
...
@@ -256,6 +256,7 @@ class DeepseekOCRDummyInputsBuilder(BaseDummyInputsBuilder[DeepseekOCRProcessing
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/deepseek_ocr2.py
View file @
0ab06100
...
@@ -138,6 +138,7 @@ class DeepseekOCR2DummyInputsBuilder(
...
@@ -138,6 +138,7 @@ class DeepseekOCR2DummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/deepseek_vl2.py
View file @
0ab06100
...
@@ -215,6 +215,7 @@ class DeepseekVL2DummyInputsBuilder(BaseDummyInputsBuilder[DeepseekVL2Processing
...
@@ -215,6 +215,7 @@ class DeepseekVL2DummyInputsBuilder(BaseDummyInputsBuilder[DeepseekVL2Processing
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/dots_ocr.py
View file @
0ab06100
...
@@ -107,10 +107,13 @@ class DotsOCRDummyInputsBuilder(Qwen2VLDummyInputsBuilder):
...
@@ -107,10 +107,13 @@ class DotsOCRDummyInputsBuilder(Qwen2VLDummyInputsBuilder):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
mm_processor_kwargs
=
mm_processor_kwargs
or
{}
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
(
# noqa: E501
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
(
# noqa: E501
mm_processor_kwargs
.
get
(
"max_pixels"
,
None
)
)
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
0ab06100
...
@@ -1153,6 +1153,7 @@ class Ernie4_5_VLDummyInputsBuilder(BaseDummyInputsBuilder[Ernie4_5_VLProcessing
...
@@ -1153,6 +1153,7 @@ class Ernie4_5_VLDummyInputsBuilder(BaseDummyInputsBuilder[Ernie4_5_VLProcessing
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
vllm/model_executor/models/funasr.py
View file @
0ab06100
...
@@ -745,8 +745,11 @@ class FunASRDummyInputsBuilder(BaseDummyInputsBuilder[FunASRProcessingInfo]):
...
@@ -745,8 +745,11 @@ class FunASRDummyInputsBuilder(BaseDummyInputsBuilder[FunASRProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
feature_extractor
.
sampling_rate
sampling_rate
=
feature_extractor
.
sampling_rate
audio_len
=
feature_extractor
.
chunk_length
*
sampling_rate
audio_len
=
feature_extractor
.
chunk_length
*
sampling_rate
...
...
vllm/model_executor/models/funaudiochat.py
View file @
0ab06100
...
@@ -611,8 +611,11 @@ class FunAudioChatDummyInputsBuilder(
...
@@ -611,8 +611,11 @@ class FunAudioChatDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
int
(
feature_extractor
.
sampling_rate
)
sampling_rate
=
int
(
feature_extractor
.
sampling_rate
)
# Dummy inputs are used for profiling; construct the worst-case audio
# Dummy inputs are used for profiling; construct the worst-case audio
...
@@ -656,7 +659,7 @@ class FunAudioChatMultiModalProcessor(
...
@@ -656,7 +659,7 @@ class FunAudioChatMultiModalProcessor(
if
not
audios
:
if
not
audios
:
return
BatchFeature
({
"input_ids"
:
input_ids
})
return
BatchFeature
({
"input_ids"
:
input_ids
})
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
mm_kwargs
)
sr
=
int
(
feature_extractor
.
sampling_rate
)
sr
=
int
(
feature_extractor
.
sampling_rate
)
min_samples
=
int
(
getattr
(
feature_extractor
,
"n_fft"
,
400
)
or
400
)
min_samples
=
int
(
getattr
(
feature_extractor
,
"n_fft"
,
400
)
or
400
)
...
...
vllm/model_executor/models/fuyu.py
View file @
0ab06100
...
@@ -143,6 +143,7 @@ class FuyuDummyInputsBuilder(BaseDummyInputsBuilder[FuyuProcessingInfo]):
...
@@ -143,6 +143,7 @@ class FuyuDummyInputsBuilder(BaseDummyInputsBuilder[FuyuProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/gemma3_mm.py
View file @
0ab06100
...
@@ -256,6 +256,7 @@ class Gemma3DummyInputsBuilder(BaseDummyInputsBuilder[Gemma3ProcessingInfo]):
...
@@ -256,6 +256,7 @@ class Gemma3DummyInputsBuilder(BaseDummyInputsBuilder[Gemma3ProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/gemma3n_mm.py
View file @
0ab06100
...
@@ -182,6 +182,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
...
@@ -182,6 +182,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
...
...
vllm/model_executor/models/glm4_1v.py
View file @
0ab06100
...
@@ -1143,6 +1143,7 @@ class Glm4vDummyInputsBuilder(BaseDummyInputsBuilder[Glm4vProcessingInfo]):
...
@@ -1143,6 +1143,7 @@ class Glm4vDummyInputsBuilder(BaseDummyInputsBuilder[Glm4vProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment