Commit ab5b6459 authored by Roger Wang's avatar Roger Wang Committed by simon-mo
Browse files

[Bugfix] Fallback ViT attn backend to SDPA for blackwell (#25851)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
Signed-off-by: default avatarsimon-mo <simon.mo@hey.com>
parent 8ce5d319
......@@ -66,7 +66,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
PromptReplacement, PromptUpdate,
PromptUpdateDetails)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.platforms import _Backend, current_platform
from vllm.platforms import _Backend
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import uses_mrope
from vllm.utils import is_list_of
......@@ -335,14 +335,6 @@ class Qwen3_VisionTransformer(nn.Module):
}:
raise RuntimeError(
f"Qwen3-VL does not support {self.attn_backend} backend now.")
if current_platform.is_device_capability(
100) and self.attn_backend != _Backend.TORCH_SDPA:
# TODO(Roger/Wentao): remove this after FA
# or XFORMERS's issue fixed on Blackwell
logger.info_once("Qwen3-VL vision attention does not support "
f"{self.attn_backend} backend on Blackwell now. "
"Vision attention backend is set to TORCH_SDPA.")
self.attn_backend = _Backend.TORCH_SDPA
self.blocks = nn.ModuleList([
Qwen3_VisionBlock(
......
......@@ -205,6 +205,12 @@ class CudaPlatformBase(Platform):
@classmethod
def get_vit_attn_backend(cls, head_size: int,
dtype: torch.dtype) -> _Backend:
# For Blackwell GPUs, force TORCH_SDPA for now.
# See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501
if cls.has_device_capability(100):
return _Backend.TORCH_SDPA
if dtype not in (torch.float16, torch.bfloat16):
return _Backend.XFORMERS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment