Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
987506bc
Unverified
Commit
987506bc
authored
Feb 23, 2026
by
Cyrus Leung
Committed by
GitHub
Feb 22, 2026
Browse files
[Refactor] Simplify dummy data generation (#35025)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
c645e9a2
Changes
78
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
60 additions
and
82 deletions
+60
-82
vllm/model_executor/models/funasr.py
vllm/model_executor/models/funasr.py
+7
-8
vllm/model_executor/models/funaudiochat.py
vllm/model_executor/models/funaudiochat.py
+3
-6
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/fuyu.py
+2
-3
vllm/model_executor/models/gemma3_mm.py
vllm/model_executor/models/gemma3_mm.py
+2
-3
vllm/model_executor/models/gemma3n_mm.py
vllm/model_executor/models/gemma3n_mm.py
+6
-5
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+3
-4
vllm/model_executor/models/glm4v.py
vllm/model_executor/models/glm4v.py
+2
-3
vllm/model_executor/models/glmasr.py
vllm/model_executor/models/glmasr.py
+6
-7
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granite_speech.py
+2
-3
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+1
-2
vllm/model_executor/models/hyperclovax_vision.py
vllm/model_executor/models/hyperclovax_vision.py
+3
-4
vllm/model_executor/models/idefics3.py
vllm/model_executor/models/idefics3.py
+3
-4
vllm/model_executor/models/interns1.py
vllm/model_executor/models/interns1.py
+3
-4
vllm/model_executor/models/internvl.py
vllm/model_executor/models/internvl.py
+5
-9
vllm/model_executor/models/isaac.py
vllm/model_executor/models/isaac.py
+3
-3
vllm/model_executor/models/kanana_v.py
vllm/model_executor/models/kanana_v.py
+1
-2
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+3
-4
vllm/model_executor/models/kimi_k25.py
vllm/model_executor/models/kimi_k25.py
+1
-2
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+2
-3
vllm/model_executor/models/lfm2_vl.py
vllm/model_executor/models/lfm2_vl.py
+2
-3
No files found.
vllm/model_executor/models/funasr.py
View file @
987506bc
...
...
@@ -746,23 +746,22 @@ class FunASRDummyInputsBuilder(BaseDummyInputsBuilder[FunASRProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
feature_extractor
=
self
.
info
.
get_feature_extractor
()
sampling_rate
=
feature_extractor
.
sampling_rate
audio_len
=
feature_extractor
.
chunk_length
*
sampling_rate
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
return
{
"audio"
:
self
.
_get_dummy_audios
(
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
)
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
,
),
}
...
...
vllm/model_executor/models/funaudiochat.py
View file @
987506bc
...
...
@@ -610,12 +610,9 @@ class FunAudioChatDummyInputsBuilder(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
feature_extractor
=
self
.
info
.
get_feature_extractor
()
sampling_rate
=
int
(
feature_extractor
.
sampling_rate
)
# Dummy inputs are used for profiling; construct the worst-case audio
...
...
@@ -632,7 +629,7 @@ class FunAudioChatDummyInputsBuilder(
)
num_audios
=
int
(
mm_counts
.
get
(
"audio"
,
0
))
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
return
{
"audio"
:
self
.
_get_dummy_audios
(
length
=
audio_len
,
...
...
vllm/model_executor/models/fuyu.py
View file @
987506bc
...
...
@@ -142,13 +142,12 @@ class FuyuDummyInputsBuilder(BaseDummyInputsBuilder[FuyuProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/gemma3_mm.py
View file @
987506bc
...
...
@@ -241,14 +241,13 @@ class Gemma3DummyInputsBuilder(BaseDummyInputsBuilder[Gemma3ProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/gemma3n_mm.py
View file @
987506bc
...
...
@@ -175,8 +175,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
...
...
@@ -189,8 +188,8 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
img_width
=
image_processor
.
size
.
get
(
"width"
,
224
)
img_height
=
image_processor
.
size
.
get
(
"height"
,
224
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
@@ -200,7 +199,9 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
overrides
=
image_overrides
,
),
"audio"
:
self
.
_get_dummy_audios
(
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
,
),
}
...
...
vllm/model_executor/models/glm4_1v.py
View file @
987506bc
...
...
@@ -1163,8 +1163,7 @@ class Glm4vDummyInputsBuilder(BaseDummyInputsBuilder[Glm4vProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
@@ -1174,8 +1173,8 @@ class Glm4vDummyInputsBuilder(BaseDummyInputsBuilder[Glm4vProcessingInfo]):
seq_len
,
mm_counts
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
video_overrides
=
mm_options
.
get
(
"video"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/glm4v.py
View file @
987506bc
...
...
@@ -492,8 +492,7 @@ class GLM4VDummyInputsBuilder(BaseDummyInputsBuilder[GLM4VProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
hf_config
=
self
.
info
.
get_hf_config
()
vision_config
=
hf_config
.
vision_config
...
...
@@ -501,7 +500,7 @@ class GLM4VDummyInputsBuilder(BaseDummyInputsBuilder[GLM4VProcessingInfo]):
target_width
=
target_height
=
vision_config
[
"image_size"
]
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/glmasr.py
View file @
987506bc
...
...
@@ -726,15 +726,12 @@ class GlmAsrDummyInputsBuilder(BaseDummyInputsBuilder[GlmAsrProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
feature_extractor
=
self
.
info
.
get_feature_extractor
(
**
(
mm_processor_kwargs
or
{})
)
feature_extractor
=
self
.
info
.
get_feature_extractor
()
sampling_rate
=
feature_extractor
.
sampling_rate
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
max_audio_len
=
getattr
(
self
.
info
.
get_hf_processor
(),
"max_audio_len"
,
DEFAULT_MAX_AUDIO_LEN_S
...
...
@@ -743,7 +740,9 @@ class GlmAsrDummyInputsBuilder(BaseDummyInputsBuilder[GlmAsrProcessingInfo]):
return
{
"audio"
:
self
.
_get_dummy_audios
(
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
length
=
audio_len
,
num_audios
=
num_audios
,
overrides
=
audio_overrides
,
)
}
...
...
vllm/model_executor/models/granite_speech.py
View file @
987506bc
...
...
@@ -216,11 +216,10 @@ class GraniteSpeechDummyInputsBuilder(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_audios
=
mm_counts
.
get
(
"audio"
,
0
)
audio_overrides
=
mm_options
.
get
(
"audio"
)
if
mm_options
else
None
audio_overrides
=
mm_options
.
get
(
"audio"
)
return
{
"audio"
:
self
.
_get_dummy_audios
(
...
...
vllm/model_executor/models/hunyuan_vision.py
View file @
987506bc
...
...
@@ -713,8 +713,7 @@ class HunYuanVLDummyInputsBuilder(BaseDummyInputsBuilder[HunYuanVLProcessingInfo
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
1
)
...
...
vllm/model_executor/models/hyperclovax_vision.py
View file @
987506bc
...
...
@@ -165,8 +165,7 @@ class HCXVisionDummyInputsBuilder(BaseDummyInputsBuilder[HCXVisionProcessingInfo
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
@@ -174,8 +173,8 @@ class HCXVisionDummyInputsBuilder(BaseDummyInputsBuilder[HCXVisionProcessingInfo
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_num_frames
=
32
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
video_overrides
=
mm_options
.
get
(
"video"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/idefics3.py
View file @
987506bc
...
...
@@ -277,15 +277,14 @@ class Idefics3DummyInputsBuilder(BaseDummyInputsBuilder[Idefics3ProcessingInfo])
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
hf_processor
=
self
.
info
.
get_hf_processor
(
**
(
mm_processor_kwargs
or
{})
)
hf_processor
=
self
.
info
.
get_hf_processor
()
image_processor
:
Idefics3ImageProcessor
=
hf_processor
.
image_processor
longest_edge
=
image_processor
.
max_image_size
[
"longest_edge"
]
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/interns1.py
View file @
987506bc
...
...
@@ -297,8 +297,7 @@ class InternS1DummyInputsBuilder(BaseDummyInputsBuilder[InternS1ProcessingInfo])
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
...
...
@@ -310,8 +309,8 @@ class InternS1DummyInputsBuilder(BaseDummyInputsBuilder[InternS1ProcessingInfo])
config
=
self
.
info
.
get_hf_config
()
image_size_h
,
image_size_w
=
config
.
vision_config
.
image_size
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
video_overrides
=
mm_options
.
get
(
"video"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/internvl.py
View file @
987506bc
...
...
@@ -762,13 +762,12 @@ class BaseInternVLDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
@@ -935,12 +934,9 @@ class InternVLDummyInputsBuilder(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
dummy_image
=
super
().
get_dummy_mm_data
(
seq_len
=
seq_len
,
mm_counts
=
mm_counts
,
mm_options
=
mm_options
)
dummy_image
=
super
().
get_dummy_mm_data
(
seq_len
,
mm_counts
,
mm_options
)
if
self
.
info
.
supports_video
:
config
=
self
.
info
.
get_hf_config
()
image_size
:
int
=
config
.
vision_config
.
image_size
...
...
@@ -948,7 +944,7 @@ class InternVLDummyInputsBuilder(
seq_len
,
mm_counts
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
dummy_video
=
{
"video"
:
self
.
_get_dummy_videos
(
width
=
image_size
,
...
...
vllm/model_executor/models/isaac.py
View file @
987506bc
...
...
@@ -18,6 +18,7 @@ from typing_extensions import TypedDict, Unpack
from
vllm.config
import
VllmConfig
from
vllm.config.model
import
ModelConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
...
...
@@ -849,13 +850,12 @@ class IsaacDummyInputsBuilder(BaseDummyInputsBuilder[IsaacProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/kanana_v.py
View file @
987506bc
...
...
@@ -444,8 +444,7 @@ class KananaVDummyInputsBuilder(BaseDummyInputsBuilder[KananaVProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
return
{
...
...
vllm/model_executor/models/keye.py
View file @
987506bc
...
...
@@ -1170,8 +1170,7 @@ class KeyeBaseDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
...
...
@@ -1179,8 +1178,8 @@ class KeyeBaseDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
seq_len
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
video_overrides
=
mm_options
.
get
(
"video"
)
mm_data
=
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/kimi_k25.py
View file @
987506bc
...
...
@@ -240,8 +240,7 @@ class KimiK25DummyInputsBuilder(BaseDummyInputsBuilder[KimiK25ProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
# TODO: Support mm_options for vision_chunk to allow user configuration
dummy_items
=
self
.
get_dummy_mm_items
()
...
...
vllm/model_executor/models/kimi_vl.py
View file @
987506bc
...
...
@@ -215,12 +215,11 @@ class KimiVLDummyInputsBuilder(BaseDummyInputsBuilder[KimiVLProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
vllm/model_executor/models/lfm2_vl.py
View file @
987506bc
...
...
@@ -343,14 +343,13 @@ class Lfm2VLDummyInputsBuilder(BaseDummyInputsBuilder[Lfm2VLProcessingInfo]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
mm_options
:
Mapping
[
str
,
BaseDummyOptions
]
|
None
=
None
,
mm_processor_kwargs
:
Mapping
[
str
,
object
]
|
None
=
None
,
mm_options
:
Mapping
[
str
,
BaseDummyOptions
],
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
target_width
,
target_height
=
self
.
info
.
get_image_size_with_most_features
()
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
image_overrides
=
mm_options
.
get
(
"image"
)
return
{
"image"
:
self
.
_get_dummy_images
(
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment