Unverified Commit f9ecbb18 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Misc] Allow passing logits_soft_cap for xformers backend (#11252)


Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
parent 02222a02
......@@ -17,9 +17,7 @@ from vllm.attention.backends.utils import (
is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set)
from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
from vllm.logger import init_logger
logger = init_logger(__name__)
from vllm.utils import print_warning_once
class XFormersBackend(AttentionBackend):
......@@ -386,8 +384,8 @@ class XFormersImpl(AttentionImpl[XFormersMetadata]):
raise ValueError(
"XFormers does not support block-sparse attention.")
if logits_soft_cap is not None:
raise ValueError(
"XFormers does not support attention logits soft capping.")
print_warning_once("XFormers does not support logits soft cap. "
"Outputs may be slightly off.")
self.num_heads = num_heads
self.head_size = head_size
self.scale = float(scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment