Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
67bc0c00
Unverified
Commit
67bc0c00
authored
Oct 03, 2025
by
Roger Wang
Committed by
GitHub
Oct 04, 2025
Browse files
[Bugfix] Fix qwen3 vl dummy data generation with overrides (#26193)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
5a05f266
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
20 deletions
+43
-20
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+43
-20
No files found.
vllm/model_executor/models/qwen3_vl.py
View file @
67bc0c00
...
@@ -47,7 +47,7 @@ from vllm.attention.backends.registry import _Backend
...
@@ -47,7 +47,7 @@ from vllm.attention.backends.registry import _Backend
from
vllm.attention.layer
import
check_upstream_fa_availability
from
vllm.attention.layer
import
check_upstream_fa_availability
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.distributed
import
get_pp_group
from
vllm.distributed
import
get_pp_group
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
...
@@ -741,20 +741,57 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
...
@@ -741,20 +741,57 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
)
->
MultiModalDataDict
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_images
=
mm_counts
.
get
(
"image"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
num_videos
=
mm_counts
.
get
(
"video"
,
0
)
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
target_width
,
target_height
=
(
target_width
,
target_height
=
(
self
.
info
.
get_image_size_with_most_features
())
self
.
info
.
get_image_size_with_most_features
())
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
target_num_frames
=
self
.
info
.
get_num_frames_with_most_features
(
seq_len
,
mm_counts
)
seq_len
,
mm_counts
)
if
video_overrides
:
assert
isinstance
(
video_overrides
,
VideoDummyOptions
)
num_frames_override
=
video_overrides
.
num_frames
if
num_frames_override
:
if
num_frames_override
>
target_num_frames
:
logger
.
warning
(
"video.num_frames override (%d) exceeds model's "
"maximum number of frames (%d), will be ignored"
,
num_frames_override
,
target_num_frames
)
if
num_frames_override
<
2
:
logger
.
warning
(
"video.num_frames override (%d) cannot be less "
"than 2, will be ignored"
,
num_frames_override
)
target_num_frames
=
min
(
target_num_frames
,
num_frames_override
)
target_num_frames
=
max
(
target_num_frames
,
2
)
target_video_size
,
_
=
self
.
info
.
_get_vision_info
(
target_video_size
,
_
=
self
.
info
.
_get_vision_info
(
image_width
=
target_width
,
image_width
=
target_width
,
image_height
=
target_height
,
image_height
=
target_height
,
num_frames
=
target_num_frames
,
num_frames
=
target_num_frames
,
image_processor
=
self
.
info
.
get_video_processor
(),
image_processor
=
self
.
info
.
get_video_processor
(),
)
)
# NOTE: we need to do this check here since Qwen3-VL resizes video
image_overrides
=
mm_options
.
get
(
"image"
)
if
mm_options
else
None
# frames depending on how many frames there are.
video_overrides
=
mm_options
.
get
(
"video"
)
if
mm_options
else
None
width
,
height
=
target_video_size
.
width
,
target_video_size
.
height
if
video_overrides
:
assert
isinstance
(
video_overrides
,
VideoDummyOptions
)
width_override
=
video_overrides
.
width
if
width_override
:
if
width_override
>
width
:
logger
.
warning
(
"video.width override (%d) exceeds model's "
"maximum width (%d), will be ignored"
,
width_override
,
width
)
width
=
min
(
width
,
width_override
)
height_override
=
video_overrides
.
height
if
height_override
:
if
height_override
>
height
:
logger
.
warning
(
"video.height override (%d) exceeds model's "
"maximum height (%d), will be ignored"
,
height_override
,
height
)
height
=
min
(
height
,
height_override
)
return
{
return
{
"image"
:
"image"
:
...
@@ -764,11 +801,10 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
...
@@ -764,11 +801,10 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
overrides
=
image_overrides
),
overrides
=
image_overrides
),
"video"
:
"video"
:
self
.
_get_dummy_videos
(
self
.
_get_dummy_videos
(
width
=
target_video_size
.
width
,
width
=
width
,
height
=
target_video_size
.
height
,
height
=
height
,
num_frames
=
target_num_frames
,
num_frames
=
target_num_frames
,
num_videos
=
num_videos
,
num_videos
=
num_videos
,
overrides
=
video_overrides
,
),
),
}
}
...
@@ -780,7 +816,6 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
...
@@ -780,7 +816,6 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
num_frames
:
int
,
num_frames
:
int
,
num_videos
:
int
,
num_videos
:
int
,
)
->
list
[
VideoItem
]:
)
->
list
[
VideoItem
]:
num_frames
=
max
(
num_frames
,
2
)
video
=
np
.
full
((
num_frames
,
width
,
height
,
3
),
255
,
dtype
=
np
.
uint8
)
video
=
np
.
full
((
num_frames
,
width
,
height
,
3
),
255
,
dtype
=
np
.
uint8
)
video_items
=
[]
video_items
=
[]
for
i
in
range
(
num_videos
):
for
i
in
range
(
num_videos
):
...
@@ -796,18 +831,6 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
...
@@ -796,18 +831,6 @@ class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]):
video_items
.
append
(
video_item
)
video_items
.
append
(
video_item
)
return
video_items
return
video_items
def
get_dummy_processor_inputs
(
self
,
seq_len
,
mm_counts
):
processor_inputs
=
super
().
get_dummy_processor_inputs
(
seq_len
,
mm_counts
)
# HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's
# profiling logic, which will be problematic for configurable mm
# profiling.
# TODO(Isotr0py): Switch to the implementation in
# https://github.com/vllm-project/vllm/pull/25557
# after supporting configurable mm profiling.
processor_inputs
.
hf_processor_mm_kwargs
=
{
"do_resize"
:
False
}
return
processor_inputs
class
Qwen3VLMultiModalProcessor
(
BaseMultiModalProcessor
[
Qwen3VLProcessingInfo
]
class
Qwen3VLMultiModalProcessor
(
BaseMultiModalProcessor
[
Qwen3VLProcessingInfo
]
):
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment