[attention] Fix attention (#2656)

* [attention] Fix attention * fix * correct

[attention] Fix attention (#2656)
* [attention] Fix attention * fix * correct
4ae54b37 · Patrick von Platen · GitHub · fa7a5761 · 4ae54b37 · 4ae54b37
Unverified Commit 4ae54b37 authored Mar 13, 2023 by Patrick von Platen Committed by GitHub Mar 13, 2023
Showing with 6 additions and 3 deletions

src/diffusers/models/attention.py src/diffusers/models/attention.py +5 -2

tests/pipelines/stable_diffusion/test_stable_diffusion.py tests/pipelines/stable_diffusion/test_stable_diffusion.py +1 -1

No files found.
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -271,9 +271,10 @@ class BasicTransformerBlock(nn.Module):
    def forward(
        self,
        hidden_states,
+        attention_mask=None,
        encoder_hidden_states=None,
+        encoder_attention_mask=None,
        timestep=None,
-        attention_mask=None,
        cross_attention_kwargs=None,
        class_labels=None,
    ):
@@ -302,12 +303,14 @@ class BasicTransformerBlock(nn.Module):
            norm_hidden_states = (
                self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
            )
+            # TODO (Birch-San): Here we should prepare the encoder_attention mask correctly
+            # prepare attention mask here

            # 2. Cross-Attention
            attn_output = self.attn2(
                norm_hidden_states,
                encoder_hidden_states=encoder_hidden_states,
-                attention_mask=attention_mask,
+                attention_mask=encoder_attention_mask,
                **cross_attention_kwargs,
            )
            hidden_states = attn_output + hidden_states

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -737,7 +737,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):

        # make sure that more than 4 GB is allocated
        mem_bytes = torch.cuda.max_memory_allocated()
-        assert mem_bytes > 4e9
+        assert mem_bytes > 5e9
        assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2

    def test_stable_diffusion_fp16_vs_autocast(self):