Unverified Commit f4609833 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix] Fix Mistral3 support on SM100/SM120 (#20998)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent e9534c72
......@@ -43,6 +43,7 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
PromptReplacement, PromptUpdate,
PromptUpdateDetails)
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.tokenizer import (MistralTokenizer,
cached_tokenizer_from_config)
......@@ -54,7 +55,12 @@ from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs
try:
from xformers import ops as xops
USE_XFORMERS_OPS = True
if (current_platform.is_cuda()
and current_platform.has_device_capability(100)):
# Xformers FA is not compatible with B200
USE_XFORMERS_OPS = False
else:
USE_XFORMERS_OPS = True
except ImportError:
USE_XFORMERS_OPS = False
......@@ -1082,7 +1088,6 @@ class PixtralHFAttention(nn.Module):
# Transpose q and k back for attention
q = q.transpose(1, 2).contiguous()
k = k.transpose(1, 2).contiguous()
out = xops.memory_efficient_attention(q,
k,
v,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment