Unverified Commit 087751a8 authored by Stefan He's avatar Stefan He Committed by GitHub
Browse files

Remove unecessary is_fa3_supported check (#6112)

parent 911f3ba6
...@@ -144,10 +144,6 @@ def flash_attn_with_kvcache( ...@@ -144,10 +144,6 @@ def flash_attn_with_kvcache(
logsumexp of each row of the matrix QK^T * scaling (e.g., log of the softmax logsumexp of each row of the matrix QK^T * scaling (e.g., log of the softmax
normalization factor). normalization factor).
""" """
if not is_fa3_supported():
raise NotImplementedError(
"flash_attn at sgl-kernel is only supported on sm90 and cu123 above"
)
assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension" assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension"
assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension" assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension"
if softmax_scale is None: if softmax_scale is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment