Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6c66f28f
Unverified
Commit
6c66f28f
authored
Jul 26, 2025
by
Wenchen Lo
Committed by
GitHub
Jul 26, 2025
Browse files
Remove xformers requirement for Mistral-format Pixtral and Mistral3 (#21154)
Signed-off-by:
Wenchen Lo
<
charles761013@gmail.com
>
parent
de509ae8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
3 deletions
+18
-3
vllm/model_executor/models/pixtral.py
vllm/model_executor/models/pixtral.py
+18
-3
No files found.
vllm/model_executor/models/pixtral.py
View file @
6c66f28f
...
@@ -671,7 +671,19 @@ class Attention(nn.Module):
...
@@ -671,7 +671,19 @@ class Attention(nn.Module):
v
=
v
.
reshape
(
batch
,
patches
,
self
.
n_heads
,
self
.
head_dim
)
v
=
v
.
reshape
(
batch
,
patches
,
self
.
n_heads
,
self
.
head_dim
)
q
,
k
=
apply_rotary_emb_vit
(
q
,
k
,
freqs_cis
=
freqs_cis
)
q
,
k
=
apply_rotary_emb_vit
(
q
,
k
,
freqs_cis
=
freqs_cis
)
out
=
xops
.
memory_efficient_attention
(
q
,
k
,
v
,
attn_bias
=
mask
)
if
USE_XFORMERS_OPS
:
out
=
xops
.
memory_efficient_attention
(
q
,
k
,
v
,
attn_bias
=
mask
)
else
:
q
=
q
.
transpose
(
1
,
2
)
k
=
k
.
transpose
(
1
,
2
)
v
=
v
.
transpose
(
1
,
2
)
out
=
nn
.
functional
.
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_mask
=
mask
)
out
=
out
.
transpose
(
1
,
2
)
out
=
out
.
reshape
(
batch
,
patches
,
self
.
n_heads
*
self
.
head_dim
)
out
=
out
.
reshape
(
batch
,
patches
,
self
.
n_heads
*
self
.
head_dim
)
return
self
.
wo
(
out
)
return
self
.
wo
(
out
)
...
@@ -814,8 +826,11 @@ class VisionTransformer(nn.Module):
...
@@ -814,8 +826,11 @@ class VisionTransformer(nn.Module):
mask
=
xops
.
fmha
.
attn_bias
.
BlockDiagonalMask
.
from_seqlens
(
mask
=
xops
.
fmha
.
attn_bias
.
BlockDiagonalMask
.
from_seqlens
(
[
p
.
shape
[
-
2
]
*
p
.
shape
[
-
1
]
for
p
in
patch_embeds_list
],
)
[
p
.
shape
[
-
2
]
*
p
.
shape
[
-
1
]
for
p
in
patch_embeds_list
],
)
else
:
else
:
raise
ImportError
(
"Xformers is required for Pixtral inference "
from
transformers.models.pixtral.modeling_pixtral
import
(
"with the Mistral format"
)
generate_block_attention_mask
)
mask
=
generate_block_attention_mask
(
[
p
.
shape
[
-
2
]
*
p
.
shape
[
-
1
]
for
p
in
patch_embeds_list
],
patch_embeds
)
out
=
self
.
transformer
(
patch_embeds
,
mask
=
mask
,
freqs_cis
=
freqs_cis
)
out
=
self
.
transformer
(
patch_embeds
,
mask
=
mask
,
freqs_cis
=
freqs_cis
)
# squeeze dim 0 and split into separate tensors for each image
# squeeze dim 0 and split into separate tensors for each image
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment