missing commas and backwards return arguments (#1032)

* missing commas * another fix

missing commas and backwards return arguments (#1032)
* missing commas * another fix
f4628b43 · Phil Wang · GitHub · 8f873cc6 · f4628b43
Unverified Commit f4628b43 authored Jul 09, 2024 by Phil Wang Committed by GitHub Jul 09, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

flash_attn/flash_attn_interface.py flash_attn/flash_attn_interface.py +6 -6

No files found.
--- a/flash_attn/flash_attn_interface.py
+++ b/flash_attn/flash_attn_interface.py
@@ -286,7 +286,7 @@ class FlashAttnQKVPackedFunc(torch.autograd.Function):
            rng_state=rng_state,
        )
        dqkv = dqkv[..., : dout.shape[-1]]  # We could have padded the head dimension
-        return dqkv, None, None, None, None, None, None, None
+        return dqkv, None, None, None, None, None, None, None, None


 class FlashAttnVarlenQKVPackedFunc(torch.autograd.Function):
@@ -511,7 +511,7 @@ class FlashAttnVarlenKVPackedFunc(torch.autograd.Function):
        )
        dq = dq[..., : dout.shape[-1]]  # We could have padded the head dimension
        dkv = dkv[..., : dout.shape[-1]]
-        return dq, dkv, None, None, None, None, None, None, None, None, None, None, None
+        return dq, dkv, None, None, None, None, None, None, None, None, None, None, None, None


 class FlashAttnFunc(torch.autograd.Function):
@@ -572,7 +572,7 @@ class FlashAttnFunc(torch.autograd.Function):
            ctx.softmax_scale,
            ctx.causal,
            ctx.window_size,
-            ctx.softcap
+            ctx.softcap,
            ctx.alibi_slopes,
            ctx.deterministic,
            rng_state=rng_state,
@@ -580,7 +580,7 @@ class FlashAttnFunc(torch.autograd.Function):
        dq = dq[..., : dout.shape[-1]]  # We could have padded the head dimension
        dk = dk[..., : dout.shape[-1]]
        dv = dv[..., : dout.shape[-1]]
-        return dq, dk, dv, None, None, None, None, None, None, None
+        return dq, dk, dv, None, None, None, None, None, None, None, None


 class FlashAttnVarlenFunc(torch.autograd.Function):
@@ -659,7 +659,7 @@ class FlashAttnVarlenFunc(torch.autograd.Function):
            ctx.softmax_scale,
            ctx.causal,
            ctx.window_size,
-            ctx.softcap
+            ctx.softcap,
            ctx.alibi_slopes,
            ctx.deterministic,
            rng_state=rng_state,
@@ -667,7 +667,7 @@ class FlashAttnVarlenFunc(torch.autograd.Function):
        dq = dq[..., : dout.shape[-1]]  # We could have padded the head dimension
        dk = dk[..., : dout.shape[-1]]
        dv = dv[..., : dout.shape[-1]]
-        return dq, dk, dv, None, None, None, None, None, None, None, None, None, None, None, None
+        return dq, dk, dv, None, None, None, None, None, None, None, None, None, None, None, None, None


 def flash_attn_qkvpacked_func(