handle dtype xformers attention (#1196)

handle dtype xformers

handle dtype xformers attention (#1196)
handle dtype xformers
5786b0e2 · Suraj Patil · GitHub · 32b0736d · 5786b0e2
Unverified Commit 5786b0e2 authored Nov 08, 2022 by Suraj Patil Committed by GitHub Nov 08, 2022
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

src/diffusers/models/attention.py src/diffusers/models/attention.py +2 -0

No files found.
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -492,6 +492,8 @@ class CrossAttention(nn.Module):
        # attention, what we cannot get enough of
        if self._use_memory_efficient_attention_xformers:
            hidden_states = self._memory_efficient_attention_xformers(query, key, value)
+            # Some versions of xformers return output in fp32, cast it back to the dtype of the input
+            hidden_states = hidden_states.to(query.dtype)
        else:
            if self._slice_size is None or query.shape[0] // self._slice_size == 1:
                hidden_states = self._attention(query, key, value)