Unverified Commit f9ecbb18 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Misc] Allow passing logits_soft_cap for xformers backend (#11252)


Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
parent 02222a02
...@@ -17,9 +17,7 @@ from vllm.attention.backends.utils import ( ...@@ -17,9 +17,7 @@ from vllm.attention.backends.utils import (
is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set) is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set)
from vllm.attention.ops.paged_attn import (PagedAttention, from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata) PagedAttentionMetadata)
from vllm.logger import init_logger from vllm.utils import print_warning_once
logger = init_logger(__name__)
class XFormersBackend(AttentionBackend): class XFormersBackend(AttentionBackend):
...@@ -386,8 +384,8 @@ class XFormersImpl(AttentionImpl[XFormersMetadata]): ...@@ -386,8 +384,8 @@ class XFormersImpl(AttentionImpl[XFormersMetadata]):
raise ValueError( raise ValueError(
"XFormers does not support block-sparse attention.") "XFormers does not support block-sparse attention.")
if logits_soft_cap is not None: if logits_soft_cap is not None:
raise ValueError( print_warning_once("XFormers does not support logits soft cap. "
"XFormers does not support attention logits soft capping.") "Outputs may be slightly off.")
self.num_heads = num_heads self.num_heads = num_heads
self.head_size = head_size self.head_size = head_size
self.scale = float(scale) self.scale = float(scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment