fix: cast the alibi slopes to torch.float32 (#846)

6bbc5323 · Markus Krimmel · GitHub · 4a73e903 · 6bbc5323
Unverified Commit 6bbc5323 authored Mar 15, 2024 by Markus Krimmel Committed by GitHub Mar 15, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

flash_attn/modules/mha.py flash_attn/modules/mha.py +4 -0

No files found.
--- a/flash_attn/modules/mha.py
+++ b/flash_attn/modules/mha.py
@@ -101,6 +101,8 @@ class FlashSelfAttention(nn.Module):
        assert qkv.is_cuda
        causal = self.causal if causal is None else causal
        unpadded = cu_seqlens is not None
+        if self.alibi_slopes is not None:
+            self.alibi_slopes = self.alibi_slopes.to(torch.float32)
        if unpadded:
            assert cu_seqlens.dtype == torch.int32
            assert max_seqlen is not None
@@ -185,6 +187,8 @@ class FlashCrossAttention(nn.Module):
        assert q.is_cuda and kv.is_cuda
        causal = self.causal if causal is None else causal
        unpadded = cu_seqlens is not None
+        if self.alibi_slopes is not None:
+            self.alibi_slopes = self.alibi_slopes.to(torch.float32)
        if unpadded:
            assert cu_seqlens.dtype == torch.int32
            assert max_seqlen is not None