[LoRA Attn Processors] Refactor LoRA Attn Processors (#4765)

* [LoRA Attn] Refactor LoRA attn * correct for network alphas * fix more * fix more tests * fix more tests * Move below * Finish * better version * correct serialization format * fix * fix more * fix more * fix more * Apply suggestions from code review * Update src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py * deprecation * relax atol for slow test slighly * Finish tests * make style * make style

[LoRA Attn Processors] Refactor LoRA Attn Processors (#4765)
* [LoRA Attn] Refactor LoRA attn * correct for network alphas * fix more * fix more tests * fix more tests * Move below * Finish * better version * correct serialization format * fix * fix more * fix more * fix more * Apply suggestions from code review * Update src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py * deprecation * relax atol for slow test slighly * Finish tests * make style * make style
766aa50f · Patrick von Platen · GitHub · c4d28236 · 766aa50f · 766aa50f
Unverified Commit 766aa50f authored Aug 28, 2023 by Patrick von Platen Committed by GitHub Aug 28, 2023
11 changed files
--- a/src/diffusers/loaders.py
+++ b/src/diffusers/loaders.py
@@ -24,7 +24,6 @@ from typing import Callable, Dict, List, Optional, Union
 import requests
 import safetensors
 import torch
-import torch.nn.functional as F
 from huggingface_hub import hf_hub_download, model_info
 from torch import nn
@@ -231,15 +230,7 @@ class UNet2DConditionLoadersMixin:
        """
        from .models.attention_processor import (
-            AttnAddedKVProcessor,
-            AttnAddedKVProcessor2_0,
            CustomDiffusionAttnProcessor,
-            LoRAAttnAddedKVProcessor,
-            LoRAAttnProcessor,
-            LoRAAttnProcessor2_0,
-            LoRAXFormersAttnProcessor,
-            SlicedAttnAddedKVProcessor,
-            XFormersAttnProcessor,
        )
        from .models.lora import LoRACompatibleConv, LoRACompatibleLinear, LoRAConv2dLayer, LoRALinearLayer
@@ -314,24 +305,14 @@ class UNet2DConditionLoadersMixin:
            state_dict = pretrained_model_name_or_path_or_dict
        # fill attn processors
-        attn_processors = {}
+        lora_layers_list = []
-        non_attn_lora_layers = []
        is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys())
        is_custom_diffusion = any("custom_diffusion" in k for k in state_dict.keys())
        if is_lora:
-            is_new_lora_format = all(
+            # correct keys
-                key.startswith(self.unet_name) or key.startswith(self.text_encoder_name) for key in state_dict.keys()
+            state_dict, network_alphas = self.convert_state_dict_legacy_attn_format(state_dict, network_alphas)
-            )
-            if is_new_lora_format:
-                # Strip the `"unet"` prefix.
-                is_text_encoder_present = any(key.startswith(self.text_encoder_name) for key in state_dict.keys())
-                if is_text_encoder_present:
-                    warn_message = "The state_dict contains LoRA params corresponding to the text encoder which are not being used here. To use both UNet and text encoder related LoRA params, use [`pipe.load_lora_weights()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.load_lora_weights)."
-                    warnings.warn(warn_message)
-                unet_keys = [k for k in state_dict.keys() if k.startswith(self.unet_name)]
-                state_dict = {k.replace(f"{self.unet_name}.", ""): v for k, v in state_dict.items() if k in unet_keys}
            lora_grouped_dict = defaultdict(dict)
            mapped_network_alphas = {}
@@ -367,87 +348,38 @@ class UNet2DConditionLoadersMixin:
                # Process non-attention layers, which don't have to_{k,v,q,out_proj}_lora layers
                # or add_{k,v,q,out_proj}_proj_lora layers.
-                if "lora.down.weight" in value_dict:
+                rank = value_dict["lora.down.weight"].shape[0]
-                    rank = value_dict["lora.down.weight"].shape[0]
+                if isinstance(attn_processor, LoRACompatibleConv):
-                    if isinstance(attn_processor, LoRACompatibleConv):
+                    in_features = attn_processor.in_channels
-                        in_features = attn_processor.in_channels
+                    out_features = attn_processor.out_channels
-                        out_features = attn_processor.out_channels
+                    kernel_size = attn_processor.kernel_size
-                        kernel_size = attn_processor.kernel_size
+                    lora = LoRAConv2dLayer(
-                        lora = LoRAConv2dLayer(
+                        in_features=in_features,
-                            in_features=in_features,
+                        out_features=out_features,
-                            out_features=out_features,
+                        rank=rank,
-                            rank=rank,
+                        kernel_size=kernel_size,
-                            kernel_size=kernel_size,
+                        stride=attn_processor.stride,
-                            stride=attn_processor.stride,
+                        padding=attn_processor.padding,
-                            padding=attn_processor.padding,
+                        network_alpha=mapped_network_alphas.get(key),
-                            network_alpha=mapped_network_alphas.get(key),
+                    )
-                        )
+                elif isinstance(attn_processor, LoRACompatibleLinear):
-                    elif isinstance(attn_processor, LoRACompatibleLinear):
+                    lora = LoRALinearLayer(
-                        lora = LoRALinearLayer(
+                        attn_processor.in_features,
-                            attn_processor.in_features,
+                        attn_processor.out_features,
-                            attn_processor.out_features,
+                        rank,
-                            rank,
+                        mapped_network_alphas.get(key),
-                            mapped_network_alphas.get(key),
+                    )
-                        )
-                    else:
-                        raise ValueError(f"Module {key} is not a LoRACompatibleConv or LoRACompatibleLinear module.")
-                    value_dict = {k.replace("lora.", ""): v for k, v in value_dict.items()}
-                    lora.load_state_dict(value_dict)
-                    non_attn_lora_layers.append((attn_processor, lora))
                else:
-                    # To handle SDXL.
+                    raise ValueError(f"Module {key} is not a LoRACompatibleConv or LoRACompatibleLinear module.")
-                    rank_mapping = {}
-                    hidden_size_mapping = {}
-                    for projection_id in ["to_k", "to_q", "to_v", "to_out"]:
-                        rank = value_dict[f"{projection_id}_lora.down.weight"].shape[0]
-                        hidden_size = value_dict[f"{projection_id}_lora.up.weight"].shape[0]
-                        rank_mapping.update({f"{projection_id}_lora.down.weight": rank})
-                        hidden_size_mapping.update({f"{projection_id}_lora.up.weight": hidden_size})
-                    if isinstance(
-                        attn_processor, (AttnAddedKVProcessor, SlicedAttnAddedKVProcessor, AttnAddedKVProcessor2_0)
-                    ):
-                        cross_attention_dim = value_dict["add_k_proj_lora.down.weight"].shape[1]
-                        attn_processor_class = LoRAAttnAddedKVProcessor
-                    else:
-                        cross_attention_dim = value_dict["to_k_lora.down.weight"].shape[1]
-                        if isinstance(attn_processor, (XFormersAttnProcessor, LoRAXFormersAttnProcessor)):
-                            attn_processor_class = LoRAXFormersAttnProcessor
-                        else:
-                            attn_processor_class = (
-                                LoRAAttnProcessor2_0
-                                if hasattr(F, "scaled_dot_product_attention")
-                                else LoRAAttnProcessor
-                            )
-                    if attn_processor_class is not LoRAAttnAddedKVProcessor:
-                        attn_processors[key] = attn_processor_class(
-                            rank=rank_mapping.get("to_k_lora.down.weight"),
-                            hidden_size=hidden_size_mapping.get("to_k_lora.up.weight"),
-                            cross_attention_dim=cross_attention_dim,
-                            network_alpha=mapped_network_alphas.get(key),
-                            q_rank=rank_mapping.get("to_q_lora.down.weight"),
-                            q_hidden_size=hidden_size_mapping.get("to_q_lora.up.weight"),
-                            v_rank=rank_mapping.get("to_v_lora.down.weight"),
-                            v_hidden_size=hidden_size_mapping.get("to_v_lora.up.weight"),
-                            out_rank=rank_mapping.get("to_out_lora.down.weight"),
-                            out_hidden_size=hidden_size_mapping.get("to_out_lora.up.weight"),
-                        )
-                    else:
-                        attn_processors[key] = attn_processor_class(
-                            rank=rank_mapping.get("to_k_lora.down.weight", None),
-                            hidden_size=hidden_size_mapping.get("to_k_lora.up.weight", None),
-                            cross_attention_dim=cross_attention_dim,
-                            network_alpha=mapped_network_alphas.get(key),
-                        )
-                    attn_processors[key].load_state_dict(value_dict)
+                value_dict = {k.replace("lora.", ""): v for k, v in value_dict.items()}
+                lora.load_state_dict(value_dict)
+                lora_layers_list.append((attn_processor, lora))
        elif is_custom_diffusion:
+            attn_processors = {}
            custom_diffusion_grouped_dict = defaultdict(dict)
            for key, value in state_dict.items():
                if len(value) == 0:
@@ -475,22 +407,47 @@ class UNet2DConditionLoadersMixin:
                        cross_attention_dim=cross_attention_dim,
                    )
                    attn_processors[key].load_state_dict(value_dict)
+            self.set_attn_processor(attn_processors)
        else:
            raise ValueError(
                f"{model_file} does not seem to be in the correct format expected by LoRA or Custom Diffusion training."
            )
        # set correct dtype & device
-        attn_processors = {k: v.to(device=self.device, dtype=self.dtype) for k, v in attn_processors.items()}
+        lora_layers_list = [(t, l.to(device=self.device, dtype=self.dtype)) for t, l in lora_layers_list]
-        non_attn_lora_layers = [(t, l.to(device=self.device, dtype=self.dtype)) for t, l in non_attn_lora_layers]
-        # set layers
-        self.set_attn_processor(attn_processors)
-        # set ff layers
+        # set lora layers
-        for target_module, lora_layer in non_attn_lora_layers:
+        for target_module, lora_layer in lora_layers_list:
            target_module.set_lora_layer(lora_layer)
+    def convert_state_dict_legacy_attn_format(self, state_dict, network_alphas):
+        is_new_lora_format = all(
+            key.startswith(self.unet_name) or key.startswith(self.text_encoder_name) for key in state_dict.keys()
+        )
+        if is_new_lora_format:
+            # Strip the `"unet"` prefix.
+            is_text_encoder_present = any(key.startswith(self.text_encoder_name) for key in state_dict.keys())
+            if is_text_encoder_present:
+                warn_message = "The state_dict contains LoRA params corresponding to the text encoder which are not being used here. To use both UNet and text encoder related LoRA params, use [`pipe.load_lora_weights()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.load_lora_weights)."
+                logger.warn(warn_message)
+            unet_keys = [k for k in state_dict.keys() if k.startswith(self.unet_name)]
+            state_dict = {k.replace(f"{self.unet_name}.", ""): v for k, v in state_dict.items() if k in unet_keys}
+        # change processor format to 'pure' LoRACompatibleLinear format
+        if any("processor" in k.split(".") for k in state_dict.keys()):
+            def format_to_lora_compatible(key):
+                if "processor" not in key.split("."):
+                    return key
+                return key.replace(".processor", "").replace("to_out_lora", "to_out.0.lora").replace("_lora", ".lora")
+            state_dict = {format_to_lora_compatible(k): v for k, v in state_dict.items()}
+            if network_alphas is not None:
+                network_alphas = {format_to_lora_compatible(k): v for k, v in network_alphas.items()}
+        return state_dict, network_alphas
    def save_attn_procs(
        self,
        save_directory: Union[str, os.PathLike],
@@ -1748,36 +1705,9 @@ class LoraLoaderMixin:
        >>> ...
        ```
        """
-        from .models.attention_processor import (
+        for _, module in self.unet.named_modules():
-            LORA_ATTENTION_PROCESSORS,
+            if hasattr(module, "set_lora_layer"):
-            AttnProcessor,
+                module.set_lora_layer(None)
-            AttnProcessor2_0,
-            LoRAAttnAddedKVProcessor,
-            LoRAAttnProcessor,
-            LoRAAttnProcessor2_0,
-            LoRAXFormersAttnProcessor,
-            XFormersAttnProcessor,
-        )
-        unet_attention_classes = {type(processor) for _, processor in self.unet.attn_processors.items()}
-        if unet_attention_classes.issubset(LORA_ATTENTION_PROCESSORS):
-            # Handle attention processors that are a mix of regular attention and AddedKV
-            # attention.
-            if len(unet_attention_classes) > 1 or LoRAAttnAddedKVProcessor in unet_attention_classes:
-                self.unet.set_default_attn_processor()
-            else:
-                regular_attention_classes = {
-                    LoRAAttnProcessor: AttnProcessor,
-                    LoRAAttnProcessor2_0: AttnProcessor2_0,
-                    LoRAXFormersAttnProcessor: XFormersAttnProcessor,
-                }
-                [attention_proc_class] = unet_attention_classes
-                self.unet.set_attn_processor(regular_attention_classes[attention_proc_class]())
-            for _, module in self.unet.named_modules():
-                if hasattr(module, "set_lora_layer"):
-                    module.set_lora_layer(None)
        # Safe to call the following regardless of LoRA.
        self._remove_text_encoder_monkey_patch()

--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
--- a/src/diffusers/models/autoencoder_kl.py
+++ b/src/diffusers/models/autoencoder_kl.py
@@ -175,8 +175,8 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/models/controlnet.py
+++ b/src/diffusers/models/controlnet.py
@@ -497,8 +497,8 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/models/lora.py
+++ b/src/diffusers/models/lora.py
@@ -28,6 +28,8 @@ class LoRALinearLayer(nn.Module):
        # See https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning
        self.network_alpha = network_alpha
        self.rank = rank
+        self.out_features = out_features
+        self.in_features = in_features
        nn.init.normal_(self.down.weight, std=1 / rank)
        nn.init.zeros_(self.up.weight)
@@ -110,8 +112,8 @@ class LoRACompatibleLinear(nn.Linear):
    def set_lora_layer(self, lora_layer: Optional[LoRAConv2dLayer]):
        self.lora_layer = lora_layer
-    def forward(self, x):
+    def forward(self, hidden_states, lora_scale: int = 1):
        if self.lora_layer is None:
-            return super().forward(x)
+            return super().forward(hidden_states)
        else:
-            return super().forward(x) + self.lora_layer(x)
+            return super().forward(hidden_states) + lora_scale * self.lora_layer(hidden_states)
--- a/src/diffusers/models/prior_transformer.py
+++ b/src/diffusers/models/prior_transformer.py
@@ -171,8 +171,8 @@ class PriorTransformer(ModelMixin, ConfigMixin):
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/models/unet_2d_condition.py
+++ b/src/diffusers/models/unet_2d_condition.py
@@ -584,8 +584,8 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/models/unet_3d_condition.py
+++ b/src/diffusers/models/unet_3d_condition.py
@@ -280,8 +280,8 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/pipelines/audioldm2/modeling_audioldm2.py
+++ b/src/diffusers/pipelines/audioldm2/modeling_audioldm2.py
@@ -518,8 +518,8 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
+++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py
@@ -749,8 +749,8 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
        processors = {}
        def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
-            if hasattr(module, "set_processor"):
+            if hasattr(module, "get_processor"):
-                processors[f"{name}.processor"] = module.processor
+                processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
            for sub_name, child in module.named_children():
                fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)

--- a/tests/models/test_lora_layers.py
+++ b/tests/models/test_lora_layers.py
@@ -39,7 +39,6 @@ from diffusers.models.attention_processor import (
    AttnProcessor2_0,
    LoRAAttnProcessor,
    LoRAAttnProcessor2_0,
-    LoRAXFormersAttnProcessor,
    XFormersAttnProcessor,
 )
 from diffusers.utils import floats_tensor, torch_device
@@ -375,10 +374,10 @@ class LoraLoaderMixinTests(unittest.TestCase):
            # check if lora attention processors are used
            for _, module in sd_pipe.unet.named_modules():
                if isinstance(module, Attention):
-                    attn_proc_class = (
+                    self.assertIsNotNone(module.to_q.lora_layer)
-                        LoRAAttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else LoRAAttnProcessor
+                    self.assertIsNotNone(module.to_k.lora_layer)
-                    )
+                    self.assertIsNotNone(module.to_v.lora_layer)
-                    self.assertIsInstance(module.processor, attn_proc_class)
+                    self.assertIsNotNone(module.to_out[0].lora_layer)
    def test_unload_lora_sd(self):
        pipeline_components, lora_components = self.get_dummy_components()
@@ -443,7 +442,10 @@ class LoraLoaderMixinTests(unittest.TestCase):
            # check if lora attention processors are used
            for _, module in sd_pipe.unet.named_modules():
                if isinstance(module, Attention):
-                    self.assertIsInstance(module.processor, LoRAXFormersAttnProcessor)
+                    self.assertIsNotNone(module.to_q.lora_layer)
+                    self.assertIsNotNone(module.to_k.lora_layer)
+                    self.assertIsNotNone(module.to_v.lora_layer)
+                    self.assertIsNotNone(module.to_out[0].lora_layer)
            # unload lora weights
            sd_pipe.unload_lora_weights()
@@ -751,7 +753,7 @@ class LoraIntegrationTests(unittest.TestCase):
        images = images[0, -3:, -3:, -1].flatten()
        expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
    def test_kohya_sd_v15_with_higher_dimensions(self):
        generator = torch.Generator().manual_seed(0)
@@ -770,7 +772,7 @@ class LoraIntegrationTests(unittest.TestCase):
        images = images[0, -3:, -3:, -1].flatten()
        expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
    def test_vanilla_funetuning(self):
        generator = torch.Generator().manual_seed(0)
@@ -887,7 +889,7 @@ class LoraIntegrationTests(unittest.TestCase):
        images = images[0, -3:, -3:, -1].flatten()
        expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
    def test_sdxl_0_9_lora_two(self):
        generator = torch.Generator().manual_seed(0)
@@ -905,7 +907,7 @@ class LoraIntegrationTests(unittest.TestCase):
        images = images[0, -3:, -3:, -1].flatten()
        expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
    def test_sdxl_0_9_lora_three(self):
        generator = torch.Generator().manual_seed(0)
@@ -921,9 +923,9 @@ class LoraIntegrationTests(unittest.TestCase):
        ).images
        images = images[0, -3:, -3:, -1].flatten()
-        expected = np.array([0.4115, 0.4047, 0.4124, 0.3931, 0.3746, 0.3802, 0.3735, 0.3748, 0.3609])
+        expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=5e-3))
    def test_sdxl_1_0_lora(self):
        generator = torch.Generator().manual_seed(0)
@@ -941,7 +943,7 @@ class LoraIntegrationTests(unittest.TestCase):
        images = images[0, -3:, -3:, -1].flatten()
        expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
-        self.assertTrue(np.allclose(images, expected, atol=1e-4))
+        self.assertTrue(np.allclose(images, expected, atol=1e-3))
    def test_sdxl_1_0_last_ben(self):
        generator = torch.Generator().manual_seed(0)