Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
65ecb4f1
Unverified
Commit
65ecb4f1
authored
Sep 28, 2025
by
Roger Wang
Committed by
GitHub
Sep 29, 2025
Browse files
[Bugfix] Fallback ViT attn backend to SDPA for blackwell (#25851)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
143844fa
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
9 deletions
+7
-9
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+1
-9
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+6
-0
No files found.
vllm/model_executor/models/qwen3_vl.py
View file @
65ecb4f1
...
...
@@ -66,7 +66,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
PromptReplacement
,
PromptUpdate
,
PromptUpdateDetails
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.platforms
import
_Backend
,
current_platform
from
vllm.platforms
import
_Backend
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.config
import
uses_mrope
from
vllm.utils
import
is_list_of
...
...
@@ -336,14 +336,6 @@ class Qwen3_VisionTransformer(nn.Module):
}:
raise
RuntimeError
(
f
"Qwen3-VL does not support
{
self
.
attn_backend
}
backend now."
)
if
current_platform
.
is_device_capability
(
100
)
and
self
.
attn_backend
!=
_Backend
.
TORCH_SDPA
:
# TODO(Roger/Wentao): remove this after FA
# or XFORMERS's issue fixed on Blackwell
logger
.
info_once
(
"Qwen3-VL vision attention does not support "
f
"
{
self
.
attn_backend
}
backend on Blackwell now. "
"Vision attention backend is set to TORCH_SDPA."
)
self
.
attn_backend
=
_Backend
.
TORCH_SDPA
self
.
blocks
=
nn
.
ModuleList
([
Qwen3_VisionBlock
(
...
...
vllm/platforms/cuda.py
View file @
65ecb4f1
...
...
@@ -205,6 +205,12 @@ class CudaPlatformBase(Platform):
@
classmethod
def
get_vit_attn_backend
(
cls
,
head_size
:
int
,
dtype
:
torch
.
dtype
)
->
_Backend
:
# For Blackwell GPUs, force TORCH_SDPA for now.
# See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501
if
cls
.
has_device_capability
(
100
):
return
_Backend
.
TORCH_SDPA
if
dtype
not
in
(
torch
.
float16
,
torch
.
bfloat16
):
return
_Backend
.
XFORMERS
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment