Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1e508343
Unverified
Commit
1e508343
authored
Mar 21, 2025
by
Isotr0py
Committed by
GitHub
Mar 20, 2025
Browse files
[Bugfix] Fix incorrect qwen2.5-vl attention mask pre-computation (#15200)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
2e0b4cfd
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
37 additions
and
4 deletions
+37
-4
tests/models/decoder_only/vision_language/test_models.py
tests/models/decoder_only/vision_language/test_models.py
+13
-0
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
...s/decoder_only/vision_language/vlm_utils/custom_inputs.py
+18
-0
vllm/model_executor/models/qwen2_5_vl.py
vllm/model_executor/models/qwen2_5_vl.py
+6
-4
No files found.
tests/models/decoder_only/vision_language/test_models.py
View file @
1e508343
...
...
@@ -508,6 +508,19 @@ VLM_TEST_SETTINGS = {
limit_mm_per_prompt
=
{
"image"
:
4
},
)],
),
# regression test for https://github.com/vllm-project/vllm/issues/15122
"qwen2_5_vl-windows-attention"
:
VLMTestInfo
(
models
=
[
"Qwen/Qwen2.5-VL-3B-Instruct"
],
test_type
=
VLMTestType
.
CUSTOM_INPUTS
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
qwen2_vllm_to_hf_output
,
custom_test_opts
=
[
CustomTestOptions
(
inputs
=
custom_inputs
.
windows_attention_image_qwen2_5_vl
(),
limit_mm_per_prompt
=
{
"image"
:
1
},
)],
),
}
# yapf: enable
...
...
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
View file @
1e508343
# SPDX-License-Identifier: Apache-2.0
"""Custom input builders for edge-cases in different models."""
from
io
import
BytesIO
from
typing
import
Callable
import
requests
from
PIL
import
Image
from
vllm.multimodal.image
import
rescale_image_size
from
vllm.multimodal.video
import
(
rescale_video_size
,
resize_video
,
sample_frames_from_video
)
...
...
@@ -102,3 +106,17 @@ def different_patch_input_cases_internvl():
build_single_image_inputs
(
images
,
formatted_sprompts
,
wrapped_sf
),
build_multi_image_inputs
([
images
],
formatted_mprompts
,
wrapped_sf
),
]
def
windows_attention_image_qwen2_5_vl
():
# image from regression issue: https://github.com/vllm-project/vllm/issues/15122
image_url
=
"https://aomediacodec.github.io/av1-avif/testFiles/Link-U/hato.jpg"
image
=
Image
.
open
(
BytesIO
(
requests
.
get
(
image_url
).
content
))
question
=
"Describe the image."
img_prompt
=
"<|vision_start|><|image_pad|><|vision_end|>"
prompt
=
(
f
"<|im_start|>User
\n
{
img_prompt
}{
question
}
<|im_end|>
\n
"
"<|im_start|>assistant
\n
"
)
wrapped_sf
=
ImageSizeWrapper
(
type
=
SizeType
.
SIZE_FACTOR
,
data
=
[
0.5
])
return
build_single_image_inputs
([
image
],
[
prompt
],
wrapped_sf
)
vllm/model_executor/models/qwen2_5_vl.py
View file @
1e508343
...
...
@@ -647,15 +647,17 @@ class Qwen2_5_VisionTransformer(nn.Module):
max_seqlen
=
None
seqlens
=
None
if
self
.
attn_backend
==
_Backend
.
FLASH_ATTN
:
max_seqlen
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
max
().
item
()
elif
self
.
attn_backend
==
_Backend
.
XFORMERS
:
seqlens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
for
layer_num
,
blk
in
enumerate
(
self
.
blocks
):
if
layer_num
in
self
.
fullatt_block_indexes
:
cu_seqlens_now
=
cu_seqlens
else
:
cu_seqlens_now
=
cu_window_seqlens
# pre-compute cu_seqlens for window attn
if
self
.
attn_backend
==
_Backend
.
FLASH_ATTN
:
max_seqlen
=
(
cu_seqlens_now
[
1
:]
-
cu_seqlens_now
[:
-
1
]).
max
().
item
()
elif
self
.
attn_backend
==
_Backend
.
XFORMERS
:
seqlens
=
(
cu_seqlens_now
[
1
:]
-
cu_seqlens_now
[:
-
1
]).
tolist
()
hidden_states
=
blk
(
hidden_states
,
cu_seqlens
=
cu_seqlens_now
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment