Remove unecessary is_fa3_supported check (#6112)

087751a8 · Stefan He · GitHub · 911f3ba6 · 087751a8
Unverified Commit 087751a8 authored May 08, 2025 by Stefan He Committed by GitHub May 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 4 deletions

sgl-kernel/python/sgl_kernel/flash_attn.py sgl-kernel/python/sgl_kernel/flash_attn.py +0 -4

No files found.
--- a/sgl-kernel/python/sgl_kernel/flash_attn.py
+++ b/sgl-kernel/python/sgl_kernel/flash_attn.py
@@ -144,10 +144,6 @@ def flash_attn_with_kvcache(
            logsumexp of each row of the matrix QK^T * scaling (e.g., log of the softmax
            normalization factor).
    """
-    if not is_fa3_supported():
-        raise NotImplementedError(
-            "flash_attn at sgl-kernel is only supported on sm90 and cu123 above"
-        )
    assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension"
    assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension"
    if softmax_scale is None: