Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0ab06100
Unverified
Commit
0ab06100
authored
Feb 12, 2026
by
Isotr0py
Committed by
GitHub
Feb 11, 2026
Browse files
[Multimodal] Expose `mm_processor_kwargs` for `DummyInputsBuilder` (#34330)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
ffb3d553
Changes
72
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
26 additions
and
12 deletions
+26
-12
vllm/model_executor/models/glm4v.py
vllm/model_executor/models/glm4v.py
+1
-0
vllm/model_executor/models/glmasr.py
vllm/model_executor/models/glmasr.py
+4
-1
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granite_speech.py
+1
-0
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+1
-0
vllm/model_executor/models/hyperclovax_vision.py
vllm/model_executor/models/hyperclovax_vision.py
+1
-0
vllm/model_executor/models/idefics3.py
vllm/model_executor/models/idefics3.py
+3
-11
vllm/model_executor/models/interns1.py
vllm/model_executor/models/interns1.py
+1
-0
vllm/model_executor/models/internvl.py
vllm/model_executor/models/internvl.py
+2
-0
vllm/model_executor/models/isaac.py
vllm/model_executor/models/isaac.py
+1
-0
vllm/model_executor/models/kanana_v.py
vllm/model_executor/models/kanana_v.py
+1
-0
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+1
-0
vllm/model_executor/models/kimi_k25.py
vllm/model_executor/models/kimi_k25.py
+1
-0
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+1
-0
vllm/model_executor/models/lfm2_vl.py
vllm/model_executor/models/lfm2_vl.py
+1
-0
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava.py
+1
-0
vllm/model_executor/models/llava_next_video.py
vllm/model_executor/models/llava_next_video.py
+1
-0
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/llava_onevision.py
+1
-0
vllm/model_executor/models/midashenglm.py
vllm/model_executor/models/midashenglm.py
+1
-0
vllm/model_executor/models/minicpmo.py
vllm/model_executor/models/minicpmo.py
+1
-0
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+1
-0
No files found.
vllm/model_executor/models/glm4v.py
View file @
0ab06100
...
@@ -493,6 +493,7 @@ class GLM4VDummyInputsBuilder(BaseDummyInputsBuilder[GLM4VProcessingInfo]):
...
@@ -493,6 +493,7 @@ class GLM4VDummyInputsBuilder(BaseDummyInputsBuilder[GLM4VProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
hf_config
=
self
.
info
.
get_hf_config
()
hf_config
=
self
.
info
.
get_hf_config
()
vision_config
=
hf_config
.
vision_config
vision_config
=
hf_config
.
vision_config
...
...
vllm/model_executor/models/glmasr.py
View file @
0ab06100
...
@@ -727,8 +727,11 @@ class GlmAsrDummyInputsBuilder(BaseDummyInputsBuilder[GlmAsrProcessingInfo]):
...
@@ -727,8 +727,11 @@ class GlmAsrDummyInputsBuilder(BaseDummyInputsBuilder[GlmAsrProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
()
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
sampling_rate
=
feature_extractor
.
sampling_rate
sampling_rate
=
feature_extractor
.
sampling_rate
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
...
...
vllm/model_executor/models/granite_speech.py
View file @
0ab06100
...
@@ -217,6 +217,7 @@ class GraniteSpeechDummyInputsBuilder(
...
@@ -217,6 +217,7 @@ class GraniteSpeechDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
...
...
vllm/model_executor/models/hunyuan_vision.py
View file @
0ab06100
...
@@ -702,6 +702,7 @@ class HunYuanVLDummyInputsBuilder(BaseDummyInputsBuilder[HunYuanVLProcessingInfo
...
@@ -702,6 +702,7 @@ class HunYuanVLDummyInputsBuilder(BaseDummyInputsBuilder[HunYuanVLProcessingInfo
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
1
)
num_images
=
mm_counts
.
get
(
"image"
,
1
)
...
...
vllm/model_executor/models/hyperclovax_vision.py
View file @
0ab06100
...
@@ -166,6 +166,7 @@ class HCXVisionDummyInputsBuilder(BaseDummyInputsBuilder[HCXVisionProcessingInfo
...
@@ -166,6 +166,7 @@ class HCXVisionDummyInputsBuilder(BaseDummyInputsBuilder[HCXVisionProcessingInfo
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
vllm/model_executor/models/idefics3.py
View file @
0ab06100
...
@@ -42,7 +42,7 @@ from vllm.multimodal.inputs import (
...
@@ -42,7 +42,7 @@ from vllm.multimodal.inputs import (
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
from
vllm.multimodal.parse
import
ImageProcessorItems
,
ImageSize
,
MultiModalDataItems
from
vllm.multimodal.parse
import
ImageProcessorItems
,
MultiModalDataItems
from
vllm.multimodal.processing
import
(
from
vllm.multimodal.processing
import
(
BaseDummyInputsBuilder
,
BaseDummyInputsBuilder
,
BaseMultiModalProcessor
,
BaseMultiModalProcessor
,
...
@@ -285,15 +285,6 @@ class Idefics3ProcessingInfo(BaseProcessingInfo):
...
@@ -285,15 +285,6 @@ class Idefics3ProcessingInfo(BaseProcessingInfo):
return
num_patches
*
processor
.
image_seq_len
return
num_patches
*
processor
.
image_seq_len
def
get_image_size_with_most_features
(
self
)
->
ImageSize
:
processor
=
self
.
get_hf_processor
()
image_processor
:
Idefics3ImageProcessor
=
processor
.
image_processor
return
ImageSize
(
width
=
image_processor
.
size
[
"longest_edge"
],
height
=
image_processor
.
size
[
"longest_edge"
],
)
class
Idefics3DummyInputsBuilder
(
BaseDummyInputsBuilder
[
Idefics3ProcessingInfo
]):
class
Idefics3DummyInputsBuilder
(
BaseDummyInputsBuilder
[
Idefics3ProcessingInfo
]):
def
get_dummy_text
(
self
,
mm_counts
:
Mapping
[
str
,
int
])
->
str
:
def
get_dummy_text
(
self
,
mm_counts
:
Mapping
[
str
,
int
])
->
str
:
...
@@ -309,9 +300,10 @@ class Idefics3DummyInputsBuilder(BaseDummyInputsBuilder[Idefics3ProcessingInfo])
...
@@ -309,9 +300,10 @@ class Idefics3DummyInputsBuilder(BaseDummyInputsBuilder[Idefics3ProcessingInfo])
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
hf_processor
=
self
.
info
.
get_hf_processor
()
hf_processor
=
self
.
info
.
get_hf_processor
(
**
(
mm_processor_kwargs
or
{})
)
image_processor
:
Idefics3ImageProcessor
=
hf_processor
.
image_processor
image_processor
:
Idefics3ImageProcessor
=
hf_processor
.
image_processor
longest_edge
=
image_processor
.
max_image_size
[
"longest_edge"
]
longest_edge
=
image_processor
.
max_image_size
[
"longest_edge"
]
...
...
vllm/model_executor/models/interns1.py
View file @
0ab06100
...
@@ -298,6 +298,7 @@ class InternS1DummyInputsBuilder(BaseDummyInputsBuilder[InternS1ProcessingInfo])
...
@@ -298,6 +298,7 @@ class InternS1DummyInputsBuilder(BaseDummyInputsBuilder[InternS1ProcessingInfo])
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
...
...
vllm/model_executor/models/internvl.py
View file @
0ab06100
...
@@ -766,6 +766,7 @@ class BaseInternVLDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
...
@@ -766,6 +766,7 @@ class BaseInternVLDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
@@ -938,6 +939,7 @@ class InternVLDummyInputsBuilder(
...
@@ -938,6 +939,7 @@ class InternVLDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
dummy_image
=
super
().
get_dummy_mm_data
(
dummy_image
=
super
().
get_dummy_mm_data
(
seq_len
=
seq_len
,
mm_counts
=
mm_counts
,
mm_options
=
mm_options
seq_len
=
seq_len
,
mm_counts
=
mm_counts
,
mm_options
=
mm_options
...
...
vllm/model_executor/models/isaac.py
View file @
0ab06100
...
@@ -850,6 +850,7 @@ class IsaacDummyInputsBuilder(BaseDummyInputsBuilder[IsaacProcessingInfo]):
...
@@ -850,6 +850,7 @@ class IsaacDummyInputsBuilder(BaseDummyInputsBuilder[IsaacProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
]
|
None
=
None
,
mm_options
:
Mapping
[
str
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/kanana_v.py
View file @
0ab06100
...
@@ -445,6 +445,7 @@ class KananaVDummyInputsBuilder(BaseDummyInputsBuilder[KananaVProcessingInfo]):
...
@@ -445,6 +445,7 @@ class KananaVDummyInputsBuilder(BaseDummyInputsBuilder[KananaVProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
return
{
return
{
...
...
vllm/model_executor/models/keye.py
View file @
0ab06100
...
@@ -1159,6 +1159,7 @@ class KeyeBaseDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
...
@@ -1159,6 +1159,7 @@ class KeyeBaseDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
vllm/model_executor/models/kimi_k25.py
View file @
0ab06100
...
@@ -238,6 +238,7 @@ class KimiK25DummyInputsBuilder(BaseDummyInputsBuilder[KimiK25ProcessingInfo]):
...
@@ -238,6 +238,7 @@ class KimiK25DummyInputsBuilder(BaseDummyInputsBuilder[KimiK25ProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
# TODO: Support mm_options for vision_chunk to allow user configuration
# TODO: Support mm_options for vision_chunk to allow user configuration
dummy_items
=
self
.
get_dummy_mm_items
()
dummy_items
=
self
.
get_dummy_mm_items
()
...
...
vllm/model_executor/models/kimi_vl.py
View file @
0ab06100
...
@@ -216,6 +216,7 @@ class KimiVLDummyInputsBuilder(BaseDummyInputsBuilder[KimiVLProcessingInfo]):
...
@@ -216,6 +216,7 @@ class KimiVLDummyInputsBuilder(BaseDummyInputsBuilder[KimiVLProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/lfm2_vl.py
View file @
0ab06100
...
@@ -319,6 +319,7 @@ class Lfm2VLDummyInputsBuilder(BaseDummyInputsBuilder[Lfm2VLProcessingInfo]):
...
@@ -319,6 +319,7 @@ class Lfm2VLDummyInputsBuilder(BaseDummyInputsBuilder[Lfm2VLProcessingInfo]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/llava.py
View file @
0ab06100
...
@@ -232,6 +232,7 @@ class LlavaDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
...
@@ -232,6 +232,7 @@ class LlavaDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
...
...
vllm/model_executor/models/llava_next_video.py
View file @
0ab06100
...
@@ -166,6 +166,7 @@ class LlavaNextVideoDummyInputsBuilder(
...
@@ -166,6 +166,7 @@ class LlavaNextVideoDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
vllm/model_executor/models/llava_onevision.py
View file @
0ab06100
...
@@ -277,6 +277,7 @@ class LlavaOnevisionDummyInputsBuilder(
...
@@ -277,6 +277,7 @@ class LlavaOnevisionDummyInputsBuilder(
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
vllm/model_executor/models/midashenglm.py
View file @
0ab06100
...
@@ -566,6 +566,7 @@ class MiDashengLMDummyInputsBuilder(BaseDummyInputsBuilder[MiDashengLMProcessing
...
@@ -566,6 +566,7 @@ class MiDashengLMDummyInputsBuilder(BaseDummyInputsBuilder[MiDashengLMProcessing
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
...
...
vllm/model_executor/models/minicpmo.py
View file @
0ab06100
...
@@ -302,6 +302,7 @@ class MiniCPMODummyInputsBuilder(MiniCPMVDummyInputsBuilder[MiniCPMOProcessingIn
...
@@ -302,6 +302,7 @@ class MiniCPMODummyInputsBuilder(MiniCPMVDummyInputsBuilder[MiniCPMOProcessingIn
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_len
=
(
audio_len
=
(
...
...
vllm/model_executor/models/minicpmv.py
View file @
0ab06100
...
@@ -708,6 +708,7 @@ class MiniCPMVDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
...
@@ -708,6 +708,7 @@ class MiniCPMVDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
seq_len
:
int
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment