Revert "[LoRA] introduce `LoraBaseMixin` to promote reusability." (#8773)

Revert "[LoRA] introduce `LoraBaseMixin` to promote reusability. (#8670)" This reverts commit a2071a18.

Revert "[LoRA] introduce `LoraBaseMixin` to promote reusability." (#8773)
Revert "[LoRA] introduce `LoraBaseMixin` to promote reusability. (#8670)" This reverts commit a2071a18.
984d3405 · Sayak Paul · GitHub · a2071a18 · 984d3405 · 984d3405
Unverified Commit 984d3405 authored Jul 03, 2024 by Sayak Paul Committed by GitHub Jul 03, 2024
13 changed files
--- a/docs/source/en/api/loaders/lora.md
+++ b/docs/source/en/api/loaders/lora.md
@@ -12,13 +12,10 @@ specific language governing permissions and limitations under the License.

 # LoRA

-LoRA is a fast and lightweight training method that inserts and trains a significantly smaller number of parameters instead of all the model parameters. This produces a smaller file (~100 MBs) and makes it easier to quickly train a model to learn a new concept. LoRA weights are typically loaded into the denoiser, text encoder or both. The denoiser usually corresponds to a UNet ([`UNet2DConditionModel`], for example) or a Transformer ([`SD3Transformer2DModel`], for example). There are several classes for loading LoRA weights:
+LoRA is a fast and lightweight training method that inserts and trains a significantly smaller number of parameters instead of all the model parameters. This produces a smaller file (~100 MBs) and makes it easier to quickly train a model to learn a new concept. LoRA weights are typically loaded into the UNet, text encoder or both. There are two classes for loading LoRA weights:

 - [`LoraLoaderMixin`] provides functions for loading and unloading, fusing and unfusing, enabling and disabling, and more functions for managing LoRA weights. This class can be used with any model.
 - [`StableDiffusionXLLoraLoaderMixin`] is a [Stable Diffusion (SDXL)](../../api/pipelines/stable_diffusion/stable_diffusion_xl) version of the [`LoraLoaderMixin`] class for loading and saving LoRA weights. It can only be used with the SDXL model.
- [`SD3LoraLoaderMixin`] provides similar functions for [Stable Diffusion 3](https://huggingface.co/blog/sd3).
- [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
- [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.

 <Tip>

@@ -33,15 +30,3 @@ To learn more about how to load LoRA weights, see the [LoRA](../../using-diffuse
 ## StableDiffusionXLLoraLoaderMixin

 [[autodoc]] loaders.lora.StableDiffusionXLLoraLoaderMixin
\ No newline at end of file
-
-## SD3LoraLoaderMixin
-
-[[autodoc]] loaders.lora.SD3LoraLoaderMixin
-
-## AmusedLoraLoaderMixin
-
-[[autodoc]] loaders.lora.AmusedLoraLoaderMixin
-
-## LoraBaseMixin
-
-[[autodoc]] loaders.lora_base.LoraBaseMixin
\ No newline at end of file
--- a/examples/amused/train_amused.py
+++ b/examples/amused/train_amused.py
@@ -41,7 +41,7 @@ from transformers import (

 import diffusers.optimization
 from diffusers import AmusedPipeline, AmusedScheduler, EMAModel, UVit2DModel, VQModel
-from diffusers.loaders import AmusedLoraLoaderMixin
+from diffusers.loaders import LoraLoaderMixin
 from diffusers.utils import is_wandb_available


@@ -532,7 +532,7 @@ def main(args):
                weights.pop()

            if transformer_lora_layers_to_save is not None or text_encoder_lora_layers_to_save is not None:
-                AmusedLoraLoaderMixin.save_lora_weights(
+                LoraLoaderMixin.save_lora_weights(
                    output_dir,
                    transformer_lora_layers=transformer_lora_layers_to_save,
                    text_encoder_lora_layers=text_encoder_lora_layers_to_save,
@@ -566,11 +566,11 @@ def main(args):
                raise ValueError(f"unexpected save model: {model.__class__}")

        if transformer is not None or text_encoder_ is not None:
-            lora_state_dict, network_alphas = AmusedLoraLoaderMixin.lora_state_dict(input_dir)
-            AmusedLoraLoaderMixin.load_lora_into_text_encoder(
+            lora_state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(input_dir)
+            LoraLoaderMixin.load_lora_into_text_encoder(
                lora_state_dict, network_alphas=network_alphas, text_encoder=text_encoder_
            )
-            AmusedLoraLoaderMixin.load_lora_into_transformer(
+            LoraLoaderMixin.load_lora_into_transformer(
                lora_state_dict, network_alphas=network_alphas, transformer=transformer
            )


--- a/src/diffusers/loaders/__init__.py
+++ b/src/diffusers/loaders/__init__.py
@@ -55,18 +55,11 @@ _import_structure = {}

 if is_torch_available():
    _import_structure["single_file_model"] = ["FromOriginalModelMixin"]
-    _import_structure["transformer_sd3"] = ["SD3TransformerLoadersMixin"]
-
    _import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
    _import_structure["utils"] = ["AttnProcsLayers"]
    if is_transformers_available():
        _import_structure["single_file"] = ["FromSingleFileMixin"]
-        _import_structure["lora"] = [
-            "AmusedLoraLoaderMixin",
-            "LoraLoaderMixin",
-            "SD3LoraLoaderMixin",
-            "StableDiffusionXLLoraLoaderMixin",
-        ]
+        _import_structure["lora"] = ["LoraLoaderMixin", "StableDiffusionXLLoraLoaderMixin", "SD3LoraLoaderMixin"]
        _import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
        _import_structure["ip_adapter"] = ["IPAdapterMixin"]

@@ -76,18 +69,12 @@ _import_structure["peft"] = ["PeftAdapterMixin"]
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
    if is_torch_available():
        from .single_file_model import FromOriginalModelMixin
-        from .transformer_sd3 import SD3TransformerLoadersMixin
        from .unet import UNet2DConditionLoadersMixin
        from .utils import AttnProcsLayers

        if is_transformers_available():
            from .ip_adapter import IPAdapterMixin
-            from .lora import (
-                AmusedLoraLoaderMixin,
-                LoraLoaderMixin,
-                SD3LoraLoaderMixin,
-                StableDiffusionXLLoraLoaderMixin,
-            )
+            from .lora import LoraLoaderMixin, SD3LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin
            from .single_file import FromSingleFileMixin
            from .textual_inversion import TextualInversionLoaderMixin


--- a/src/diffusers/loaders/lora.py
+++ b/src/diffusers/loaders/lora.py
--- a/src/diffusers/loaders/lora_base.py
+++ b/src/diffusers/loaders/lora_base.py
--- a/src/diffusers/loaders/transformer_sd3.py
+++ b/src/diffusers/loaders/transformer_sd3.py
-import inspect
-from functools import partial
-from typing import Dict, List, Optional, Union
-
-import torch.nn as nn
-
-from ..utils import (
-    USE_PEFT_BACKEND,
-    delete_adapter_layers,
-    is_accelerate_available,
-    logging,
-    set_adapter_layers,
-    set_weights_and_activate_adapters,
-)
-from .lora import TEXT_ENCODER_NAME, TRANSFORMER_NAME
-
-
-if is_accelerate_available():
-    from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module
-
-logger = logging.get_logger(__name__)
-
-
-class SD3TransformerLoadersMixin:
-    """
-    Load LoRA layers into a [`SD3Transformer2DModel`].
-    """
-
-    text_encoder_name = TEXT_ENCODER_NAME
-    transformer_name = TRANSFORMER_NAME
-
-    @classmethod
-    # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading
-    def _optionally_disable_offloading(cls, _pipeline):
-        """
-        Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
-
-        Args:
-            _pipeline (`DiffusionPipeline`):
-                The pipeline to disable offloading for.
-
-        Returns:
-            tuple:
-                A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
-        """
-        is_model_cpu_offload = False
-        is_sequential_cpu_offload = False
-
-        if _pipeline is not None and _pipeline.hf_device_map is None:
-            for _, component in _pipeline.components.items():
-                if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
-                    if not is_model_cpu_offload:
-                        is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
-                    if not is_sequential_cpu_offload:
-                        is_sequential_cpu_offload = (
-                            isinstance(component._hf_hook, AlignDevicesHook)
-                            or hasattr(component._hf_hook, "hooks")
-                            and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
-                        )
-
-                    logger.info(
-                        "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
-                    )
-                    remove_hook_from_module(component, recurse=is_sequential_cpu_offload)
-
-        return (is_model_cpu_offload, is_sequential_cpu_offload)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.fuse_lora
-    def fuse_lora(self, lora_scale=1.0, safe_fusing=False, adapter_names=None):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `fuse_lora()`.")
-
-        self.lora_scale = lora_scale
-        self._safe_fusing = safe_fusing
-        self.apply(partial(self._fuse_lora_apply, adapter_names=adapter_names))
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin._fuse_lora_apply
-    def _fuse_lora_apply(self, module, adapter_names=None):
-        from peft.tuners.tuners_utils import BaseTunerLayer
-
-        merge_kwargs = {"safe_merge": self._safe_fusing}
-
-        if isinstance(module, BaseTunerLayer):
-            if self.lora_scale != 1.0:
-                module.scale_layer(self.lora_scale)
-
-            # For BC with prevous PEFT versions, we need to check the signature
-            # of the `merge` method to see if it supports the `adapter_names` argument.
-            supported_merge_kwargs = list(inspect.signature(module.merge).parameters)
-            if "adapter_names" in supported_merge_kwargs:
-                merge_kwargs["adapter_names"] = adapter_names
-            elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None:
-                raise ValueError(
-                    "The `adapter_names` argument is not supported with your PEFT version. Please upgrade"
-                    " to the latest version of PEFT. `pip install -U peft`"
-                )
-
-            module.merge(**merge_kwargs)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.unfuse_lora
-    def unfuse_lora(self):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `unfuse_lora()`.")
-        self.apply(self._unfuse_lora_apply)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin._unfuse_lora_apply
-    def _unfuse_lora_apply(self, module):
-        from peft.tuners.tuners_utils import BaseTunerLayer
-
-        if isinstance(module, BaseTunerLayer):
-            module.unmerge()
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.unload_lora
-    def unload_lora(self):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `unload_lora()`.")
-
-        from ..utils import recurse_remove_peft_layers
-
-        recurse_remove_peft_layers(self)
-        if hasattr(self, "peft_config"):
-            del self.peft_config
-
-    # This class is almost the same but it doesn't do `_maybe_expand_lora_scales()` yet. We will work on adding
-    # this support in a future PR.
-    def set_adapters(
-        self,
-        adapter_names: Union[List[str], str],
-        weights: Optional[Union[float, Dict, List[float], List[Dict], List[None]]] = None,
-    ):
-        """
-        Set the currently active adapters for use in the Transformer.
-
-        Args:
-            adapter_names (`List[str]` or `str`):
-                The names of the adapters to use.
-            adapter_weights (`Union[List[float], float]`, *optional*):
-                The adapter(s) weights to use with the Transformer. If `None`, the weights are set to `1.0` for all the
-                adapters.
-
-        Example:
-
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
-        pipeline.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `set_adapters()`.")
-
-        adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
-
-        # Expand weights into a list, one entry per adapter
-        # examples for e.g. 2 adapters:  [{...}, 7] -> [7,7] ; None -> [None, None]
-        if not isinstance(weights, list):
-            weights = [weights] * len(adapter_names)
-
-        if len(adapter_names) != len(weights):
-            raise ValueError(
-                f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
-            )
-
-        # Set None values to default of 1.0
-        # e.g. [{...}, 7] -> [{...}, 7] ; [None, None] -> [1.0, 1.0]
-        weights = [w if w is not None else 1.0 for w in weights]
-
-        set_weights_and_activate_adapters(self, adapter_names, weights)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.disable_lora with UNet->Transformer
-    def disable_lora(self):
-        """
-        Disable the Transformer's active LoRA layers.
-
-        Example:
-
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.disable_lora()
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=False)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.enable_lora with UNet->Transformer
-    def enable_lora(self):
-        """
-        Enable the Transformer's active LoRA layers.
-
-        Example:
-
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.enable_lora()
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=True)
-
-    # Copied from diffusers.loaders.unet.UNet2DConditionLoadersMixin.delete_adapters with UNet->Transformer
-    def delete_adapters(self, adapter_names: Union[List[str], str]):
-        """
-        Delete an adapter's LoRA layers from the Transformer.
-
-        Args:
-            adapter_names (`Union[List[str], str]`):
-                The names (single string or list of strings) of the adapter to delete.
-
-        Example:
-
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic"
-        )
-        pipeline.delete_adapters("cinematic")
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-
-        if isinstance(adapter_names, str):
-            adapter_names = [adapter_names]
-
-        for adapter_name in adapter_names:
-            delete_adapter_layers(self, adapter_name)
-
-            # Pop also the corresponding adapter from the config
-            if hasattr(self, "peft_config"):
-                self.peft_config.pop(adapter_name, None)
--- a/src/diffusers/loaders/unet.py
+++ b/src/diffusers/loaders/unet.py
@@ -362,7 +362,7 @@ class UNet2DConditionLoadersMixin:
        return is_model_cpu_offload, is_sequential_cpu_offload

    @classmethod
-    # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading
+    # Copied from diffusers.loaders.lora.LoraLoaderMixin._optionally_disable_offloading
    def _optionally_disable_offloading(cls, _pipeline):
        """
        Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.

--- a/src/diffusers/models/transformers/transformer_sd3.py
+++ b/src/diffusers/models/transformers/transformer_sd3.py
@@ -13,13 +13,15 @@
 # limitations under the License.


+import inspect
+from functools import partial
 from typing import Any, Dict, List, Optional, Union

 import torch
 import torch.nn as nn

 from ...configuration_utils import ConfigMixin, register_to_config
-from ...loaders import FromOriginalModelMixin, PeftAdapterMixin, SD3TransformerLoadersMixin
+from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
 from ...models.attention import JointTransformerBlock
 from ...models.attention_processor import Attention, AttentionProcessor
 from ...models.modeling_utils import ModelMixin
@@ -32,9 +34,7 @@ from ..modeling_outputs import Transformer2DModelOutput
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name


-class SD3Transformer2DModel(
-    ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, SD3TransformerLoadersMixin
-):
+class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
    """
    The Transformer model introduced in Stable Diffusion 3.

@@ -241,6 +241,47 @@ class SD3Transformer2DModel(
        if hasattr(module, "gradient_checkpointing"):
            module.gradient_checkpointing = value

+    def fuse_lora(self, lora_scale=1.0, safe_fusing=False, adapter_names=None):
+        if not USE_PEFT_BACKEND:
+            raise ValueError("PEFT backend is required for `fuse_lora()`.")
+
+        self.lora_scale = lora_scale
+        self._safe_fusing = safe_fusing
+        self.apply(partial(self._fuse_lora_apply, adapter_names=adapter_names))
+
+    def _fuse_lora_apply(self, module, adapter_names=None):
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        merge_kwargs = {"safe_merge": self._safe_fusing}
+
+        if isinstance(module, BaseTunerLayer):
+            if self.lora_scale != 1.0:
+                module.scale_layer(self.lora_scale)
+
+            # For BC with prevous PEFT versions, we need to check the signature
+            # of the `merge` method to see if it supports the `adapter_names` argument.
+            supported_merge_kwargs = list(inspect.signature(module.merge).parameters)
+            if "adapter_names" in supported_merge_kwargs:
+                merge_kwargs["adapter_names"] = adapter_names
+            elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None:
+                raise ValueError(
+                    "The `adapter_names` argument is not supported with your PEFT version. Please upgrade"
+                    " to the latest version of PEFT. `pip install -U peft`"
+                )
+
+            module.merge(**merge_kwargs)
+
+    def unfuse_lora(self):
+        if not USE_PEFT_BACKEND:
+            raise ValueError("PEFT backend is required for `unfuse_lora()`.")
+        self.apply(self._unfuse_lora_apply)
+
+    def _unfuse_lora_apply(self, module):
+        from peft.tuners.tuners_utils import BaseTunerLayer
+
+        if isinstance(module, BaseTunerLayer):
+            module.unmerge()
+
    def forward(
        self,
        hidden_states: torch.FloatTensor,

--- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py
+++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py
@@ -30,12 +30,9 @@ from ...models.controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
 from ...models.transformers import SD3Transformer2DModel
 from ...schedulers import FlowMatchEulerDiscreteScheduler
 from ...utils import (
-    USE_PEFT_BACKEND,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
-    scale_lora_layers,
-    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -349,7 +346,6 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
        clip_skip: Optional[int] = None,
        max_sequence_length: int = 256,
-        lora_scale: Optional[float] = None,
    ):
        r"""

@@ -395,22 +391,9 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
-            lora_scale (`float`, *optional*):
-                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
        """
        device = device or self._execution_device

-        # set lora scale so that monkey patched LoRA
-        # function of text encoder can correctly access it
-        if lora_scale is not None and isinstance(self, SD3LoraLoaderMixin):
-            self._lora_scale = lora_scale
-
-            # dynamically adjust the LoRA scale
-            if self.text_encoder is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder, lora_scale)
-            if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder_2, lora_scale)
-
        prompt = [prompt] if isinstance(prompt, str) else prompt
        if prompt is not None:
            batch_size = len(prompt)
@@ -513,16 +496,6 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
                [negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1
            )

-        if self.text_encoder is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder, lora_scale)
-
-        if self.text_encoder_2 is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder_2, lora_scale)
-
        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

    def check_inputs(

--- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py
+++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py
@@ -29,12 +29,9 @@ from ...models.autoencoders import AutoencoderKL
 from ...models.transformers import SD3Transformer2DModel
 from ...schedulers import FlowMatchEulerDiscreteScheduler
 from ...utils import (
-    USE_PEFT_BACKEND,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
-    scale_lora_layers,
-    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -332,7 +329,6 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
        clip_skip: Optional[int] = None,
        max_sequence_length: int = 256,
-        lora_scale: Optional[float] = None,
    ):
        r"""

@@ -378,22 +374,9 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
-            lora_scale (`float`, *optional*):
-                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
        """
        device = device or self._execution_device

-        # set lora scale so that monkey patched LoRA
-        # function of text encoder can correctly access it
-        if lora_scale is not None and isinstance(self, SD3LoraLoaderMixin):
-            self._lora_scale = lora_scale
-
-            # dynamically adjust the LoRA scale
-            if self.text_encoder is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder, lora_scale)
-            if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder_2, lora_scale)
-
        prompt = [prompt] if isinstance(prompt, str) else prompt
        if prompt is not None:
            batch_size = len(prompt)
@@ -496,16 +479,6 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
                [negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1
            )

-        if self.text_encoder is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder, lora_scale)
-
-        if self.text_encoder_2 is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder_2, lora_scale)
-
        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

    def check_inputs(
@@ -814,9 +787,6 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle

        device = self._execution_device

-        lora_scale = (
-            self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
-        )
        (
            prompt_embeds,
            negative_prompt_embeds,
@@ -838,7 +808,6 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
            clip_skip=self.clip_skip,
            num_images_per_prompt=num_images_per_prompt,
            max_sequence_length=max_sequence_length,
-            lora_scale=lora_scale,
        )

        if self.do_classifier_free_guidance:

--- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py
@@ -25,17 +25,13 @@ from transformers import (
 )

 from ...image_processor import PipelineImageInput, VaeImageProcessor
-from ...loaders import SD3LoraLoaderMixin
 from ...models.autoencoders import AutoencoderKL
 from ...models.transformers import SD3Transformer2DModel
 from ...schedulers import FlowMatchEulerDiscreteScheduler
 from ...utils import (
-    USE_PEFT_BACKEND,
    is_torch_xla_available,
    logging,
    replace_example_docstring,
-    scale_lora_layers,
-    unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -350,7 +346,6 @@ class StableDiffusion3Img2ImgPipeline(DiffusionPipeline):
        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
        clip_skip: Optional[int] = None,
        max_sequence_length: int = 256,
-        lora_scale: Optional[float] = None,
    ):
        r"""

@@ -396,22 +391,9 @@ class StableDiffusion3Img2ImgPipeline(DiffusionPipeline):
            clip_skip (`int`, *optional*):
                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                the output of the pre-final layer will be used for computing the prompt embeddings.
-            lora_scale (`float`, *optional*):
-                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
        """
        device = device or self._execution_device

-        # set lora scale so that monkey patched LoRA
-        # function of text encoder can correctly access it
-        if lora_scale is not None and isinstance(self, SD3LoraLoaderMixin):
-            self._lora_scale = lora_scale
-
-            # dynamically adjust the LoRA scale
-            if self.text_encoder is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder, lora_scale)
-            if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
-                scale_lora_layers(self.text_encoder_2, lora_scale)
-
        prompt = [prompt] if isinstance(prompt, str) else prompt
        if prompt is not None:
            batch_size = len(prompt)
@@ -514,16 +496,6 @@ class StableDiffusion3Img2ImgPipeline(DiffusionPipeline):
                [negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1
            )

-        if self.text_encoder is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder, lora_scale)
-
-        if self.text_encoder_2 is not None:
-            if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
-                # Retrieve the original scale by scaling back the LoRA layers
-                unscale_lora_layers(self.text_encoder_2, lora_scale)
-
        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

    def check_inputs(

--- a/tests/lora/test_lora_layers_sd3.py
+++ b/tests/lora/test_lora_layers_sd3.py
@@ -12,55 +12,377 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import sys
+import tempfile
 import unittest

+import numpy as np
+import torch
+from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel
+
 from diffusers import (
+    AutoencoderKL,
    FlowMatchEulerDiscreteScheduler,
+    SD3Transformer2DModel,
    StableDiffusion3Pipeline,
 )
 from diffusers.utils.testing_utils import is_peft_available, require_peft_backend, require_torch_gpu, torch_device


 if is_peft_available():
-    pass
+    from peft import LoraConfig
+    from peft.utils import get_peft_model_state_dict

 sys.path.append(".")

-from utils import PeftLoraLoaderMixinTests  # noqa: E402
+from utils import check_if_lora_correctly_set  # noqa: E402


 @require_peft_backend
-class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
+class SD3LoRATests(unittest.TestCase):
    pipeline_class = StableDiffusion3Pipeline
-    scheduler_cls = FlowMatchEulerDiscreteScheduler()
-    scheduler_kwargs = {}
-    transformer_kwargs = {
-        "sample_size": 32,
-        "patch_size": 1,
-        "in_channels": 4,
-        "num_layers": 1,
-        "attention_head_dim": 8,
-        "num_attention_heads": 4,
-        "caption_projection_dim": 32,
-        "joint_attention_dim": 32,
-        "pooled_projection_dim": 64,
-        "out_channels": 4,
+
+    def get_dummy_components(self):
+        torch.manual_seed(0)
+        transformer = SD3Transformer2DModel(
+            sample_size=32,
+            patch_size=1,
+            in_channels=4,
+            num_layers=1,
+            attention_head_dim=8,
+            num_attention_heads=4,
+            caption_projection_dim=32,
+            joint_attention_dim=32,
+            pooled_projection_dim=64,
+            out_channels=4,
+        )
+        clip_text_encoder_config = CLIPTextConfig(
+            bos_token_id=0,
+            eos_token_id=2,
+            hidden_size=32,
+            intermediate_size=37,
+            layer_norm_eps=1e-05,
+            num_attention_heads=4,
+            num_hidden_layers=5,
+            pad_token_id=1,
+            vocab_size=1000,
+            hidden_act="gelu",
+            projection_dim=32,
+        )
+
+        torch.manual_seed(0)
+        text_encoder = CLIPTextModelWithProjection(clip_text_encoder_config)
+
+        torch.manual_seed(0)
+        text_encoder_2 = CLIPTextModelWithProjection(clip_text_encoder_config)
+
+        text_encoder_3 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+        tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+        tokenizer_3 = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
+
+        torch.manual_seed(0)
+        vae = AutoencoderKL(
+            sample_size=32,
+            in_channels=3,
+            out_channels=3,
+            block_out_channels=(4,),
+            layers_per_block=1,
+            latent_channels=4,
+            norm_num_groups=1,
+            use_quant_conv=False,
+            use_post_quant_conv=False,
+            shift_factor=0.0609,
+            scaling_factor=1.5035,
+        )
+
+        scheduler = FlowMatchEulerDiscreteScheduler()
+
+        return {
+            "scheduler": scheduler,
+            "text_encoder": text_encoder,
+            "text_encoder_2": text_encoder_2,
+            "text_encoder_3": text_encoder_3,
+            "tokenizer": tokenizer,
+            "tokenizer_2": tokenizer_2,
+            "tokenizer_3": tokenizer_3,
+            "transformer": transformer,
+            "vae": vae,
        }
-    vae_kwargs = {
-        "sample_size": 32,
-        "in_channels": 3,
-        "out_channels": 3,
-        "block_out_channels": (4,),
-        "layers_per_block": 1,
-        "latent_channels": 4,
-        "norm_num_groups": 1,
-        "use_quant_conv": False,
-        "use_post_quant_conv": False,
-        "shift_factor": 0.0609,
-        "scaling_factor": 1.5035,
+
+    def get_dummy_inputs(self, device, seed=0):
+        if str(device).startswith("mps"):
+            generator = torch.manual_seed(seed)
+        else:
+            generator = torch.Generator(device="cpu").manual_seed(seed)
+
+        inputs = {
+            "prompt": "A painting of a squirrel eating a burger",
+            "generator": generator,
+            "num_inference_steps": 2,
+            "guidance_scale": 5.0,
+            "output_type": "np",
        }
-    has_three_text_encoders = True
+        return inputs
+
+    def get_lora_config_for_transformer(self):
+        lora_config = LoraConfig(
+            r=4,
+            lora_alpha=4,
+            target_modules=["to_q", "to_k", "to_v", "to_out.0"],
+            init_lora_weights=False,
+            use_dora=False,
+        )
+        return lora_config
+
+    def get_lora_config_for_text_encoders(self):
+        text_lora_config = LoraConfig(
+            r=4,
+            lora_alpha=4,
+            init_lora_weights="gaussian",
+            target_modules=["q_proj", "k_proj", "v_proj", "out_proj"],
+        )
+        return text_lora_config
+
+    def test_simple_inference_with_transformer_lora_save_load(self):
+        components = self.get_dummy_components()
+        transformer_config = self.get_lora_config_for_transformer()
+
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        inputs = self.get_dummy_inputs(torch_device)
+
+        pipe.transformer.add_adapter(transformer_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        inputs = self.get_dummy_inputs(torch_device)
+        images_lora = pipe(**inputs).images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            transformer_state_dict = get_peft_model_state_dict(pipe.transformer)
+
+            self.pipeline_class.save_lora_weights(
+                save_directory=tmpdirname,
+                transformer_lora_layers=transformer_state_dict,
+            )
+
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            pipe.unload_lora_weights()
+
+            pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
+
+        inputs = self.get_dummy_inputs(torch_device)
+        images_lora_from_pretrained = pipe(**inputs).images
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        self.assertTrue(
+            np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
+            "Loading from saved checkpoints should give same results.",
+        )
+
+    def test_simple_inference_with_clip_encoders_lora_save_load(self):
+        components = self.get_dummy_components()
+        transformer_config = self.get_lora_config_for_transformer()
+        text_encoder_config = self.get_lora_config_for_text_encoders()
+
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        inputs = self.get_dummy_inputs(torch_device)
+
+        pipe.transformer.add_adapter(transformer_config)
+        pipe.text_encoder.add_adapter(text_encoder_config)
+        pipe.text_encoder_2.add_adapter(text_encoder_config)
+
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder.")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2.")
+
+        inputs = self.get_dummy_inputs(torch_device)
+        images_lora = pipe(**inputs).images
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            transformer_state_dict = get_peft_model_state_dict(pipe.transformer)
+            text_encoder_one_state_dict = get_peft_model_state_dict(pipe.text_encoder)
+            text_encoder_two_state_dict = get_peft_model_state_dict(pipe.text_encoder_2)
+
+            self.pipeline_class.save_lora_weights(
+                save_directory=tmpdirname,
+                transformer_lora_layers=transformer_state_dict,
+                text_encoder_lora_layers=text_encoder_one_state_dict,
+                text_encoder_2_lora_layers=text_encoder_two_state_dict,
+            )
+
+            self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")))
+            pipe.unload_lora_weights()
+
+            pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))
+
+        inputs = self.get_dummy_inputs(torch_device)
+        images_lora_from_pretrained = pipe(**inputs).images
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text_encoder_one")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text_encoder_two")
+
+        self.assertTrue(
+            np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3),
+            "Loading from saved checkpoints should give same results.",
+        )
+
+    def test_simple_inference_with_transformer_lora_and_scale(self):
+        components = self.get_dummy_components()
+        transformer_lora_config = self.get_lora_config_for_transformer()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_no_lora = pipe(**inputs).images
+
+        pipe.transformer.add_adapter(transformer_lora_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora = pipe(**inputs).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
+        )
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora_scale = pipe(**inputs, joint_attention_kwargs={"scale": 0.5}).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
+            "Lora + scale should change the output",
+        )
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora_0_scale = pipe(**inputs, joint_attention_kwargs={"scale": 0.0}).images
+        self.assertTrue(
+            np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
+            "Lora + 0 scale should lead to same result as no LoRA",
+        )
+
+    def test_simple_inference_with_clip_encoders_lora_and_scale(self):
+        components = self.get_dummy_components()
+        transformer_lora_config = self.get_lora_config_for_transformer()
+        text_encoder_config = self.get_lora_config_for_text_encoders()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_no_lora = pipe(**inputs).images
+
+        pipe.transformer.add_adapter(transformer_lora_config)
+        pipe.text_encoder.add_adapter(text_encoder_config)
+        pipe.text_encoder_2.add_adapter(text_encoder_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text_encoder_one")
+        self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text_encoder_two")
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora = pipe(**inputs).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output"
+        )
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora_scale = pipe(**inputs, joint_attention_kwargs={"scale": 0.5}).images
+        self.assertTrue(
+            not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
+            "Lora + scale should change the output",
+        )
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_lora_0_scale = pipe(**inputs, joint_attention_kwargs={"scale": 0.0}).images
+        self.assertTrue(
+            np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3),
+            "Lora + 0 scale should lead to same result as no LoRA",
+        )
+
+    def test_simple_inference_with_transformer_fused(self):
+        components = self.get_dummy_components()
+        transformer_lora_config = self.get_lora_config_for_transformer()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_no_lora = pipe(**inputs).images
+
+        pipe.transformer.add_adapter(transformer_lora_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        pipe.fuse_lora()
+        # Fusing should still keep the LoRA layers
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        inputs = self.get_dummy_inputs(torch_device)
+        ouput_fused = pipe(**inputs).images
+        self.assertFalse(
+            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+    def test_simple_inference_with_transformer_fused_with_no_fusion(self):
+        components = self.get_dummy_components()
+        transformer_lora_config = self.get_lora_config_for_transformer()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_no_lora = pipe(**inputs).images
+
+        pipe.transformer.add_adapter(transformer_lora_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        inputs = self.get_dummy_inputs(torch_device)
+        ouput_lora = pipe(**inputs).images
+
+        pipe.fuse_lora()
+        # Fusing should still keep the LoRA layers
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        inputs = self.get_dummy_inputs(torch_device)
+        ouput_fused = pipe(**inputs).images
+        self.assertFalse(
+            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+        self.assertTrue(
+            np.allclose(ouput_fused, ouput_lora, atol=1e-3, rtol=1e-3),
+            "Fused lora output should be changed when LoRA isn't fused but still effective.",
+        )
+
+    def test_simple_inference_with_transformer_fuse_unfuse(self):
+        components = self.get_dummy_components()
+        transformer_lora_config = self.get_lora_config_for_transformer()
+        pipe = self.pipeline_class(**components)
+        pipe = pipe.to(torch_device)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(torch_device)
+        output_no_lora = pipe(**inputs).images
+
+        pipe.transformer.add_adapter(transformer_lora_config)
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+
+        pipe.fuse_lora()
+        # Fusing should still keep the LoRA layers
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        inputs = self.get_dummy_inputs(torch_device)
+        ouput_fused = pipe(**inputs).images
+        self.assertFalse(
+            np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )
+
+        pipe.unfuse_lora()
+        self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in transformer")
+        inputs = self.get_dummy_inputs(torch_device)
+        output_unfused_lora = pipe(**inputs).images
+        self.assertTrue(
+            np.allclose(ouput_fused, output_unfused_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output"
+        )

    @require_torch_gpu
    def test_sd3_lora(self):

--- a/tests/lora/utils.py
+++ b/tests/lora/utils.py