Unverified Commit f9f3b596 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[Attention][Bugfix] Fix FA sink support (#28660)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent 119c4927
...@@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend): ...@@ -130,6 +130,12 @@ class FlashAttentionBackend(AttentionBackend):
return flash_attn_supports_fp8() return flash_attn_supports_fp8()
return kv_cache_dtype in ["auto"] return kv_cache_dtype in ["auto"]
@classmethod
def supports_sink(cls) -> bool:
if not is_flash_attn_varlen_func_available():
return False
return flash_attn_supports_sinks()
@classmethod @classmethod
def supports_compute_capability(cls, capability: DeviceCapability) -> bool: def supports_compute_capability(cls, capability: DeviceCapability) -> bool:
return capability >= DeviceCapability(8, 0) return capability >= DeviceCapability(8, 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment