Unverified Commit d87fe95f authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[Chore] add `LoraLoaderMixin` to the inits (#8981)



* introduce  to promote reusability.

* up

* add more tests

* up

* remove comments.

* fix fuse_nan test

* clarify the scope of fuse_lora and unfuse_lora

* remove space

* rewrite fuse_lora a bit.

* feedback

* copy over load_lora_into_text_encoder.

* address dhruv's feedback.

* fix-copies

* fix issubclass.

* num_fused_loras

* fix

* fix

* remove mapping

* up

* fix

* style

* fix-copies

* change to SD3TransformerLoRALoadersMixin

* Apply suggestions from code review
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* up

* handle wuerstchen

* up

* move lora to lora_pipeline.py

* up

* fix-copies

* fix documentation.

* comment set_adapters().

* fix-copies

* fix set_adapters() at the model level.

* fix?

* fix

* loraloadermixin.

---------
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent 50e66f2f
...@@ -22,7 +22,12 @@ from transformers import ( ...@@ -22,7 +22,12 @@ from transformers import (
from diffusers import DiffusionPipeline, StableDiffusionXLPipeline from diffusers import DiffusionPipeline, StableDiffusionXLPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from diffusers.models.attention_processor import AttnProcessor2_0, XFormersAttnProcessor from diffusers.models.attention_processor import AttnProcessor2_0, XFormersAttnProcessor
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
...@@ -544,7 +549,7 @@ class SDXLLongPromptWeightingPipeline( ...@@ -544,7 +549,7 @@ class SDXLLongPromptWeightingPipeline(
StableDiffusionMixin, StableDiffusionMixin,
FromSingleFileMixin, FromSingleFileMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
): ):
r""" r"""
...@@ -556,8 +561,8 @@ class SDXLLongPromptWeightingPipeline( ...@@ -556,8 +561,8 @@ class SDXLLongPromptWeightingPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
Args: Args:
...@@ -738,7 +743,7 @@ class SDXLLongPromptWeightingPipeline( ...@@ -738,7 +743,7 @@ class SDXLLongPromptWeightingPipeline(
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
if prompt is not None and isinstance(prompt, str): if prompt is not None and isinstance(prompt, str):
......
...@@ -22,7 +22,7 @@ from PIL import Image ...@@ -22,7 +22,7 @@ from PIL import Image
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel, UNetMotionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unets.unet_motion_model import MotionAdapter from diffusers.models.unets.unet_motion_model import MotionAdapter
...@@ -114,7 +114,11 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): ...@@ -114,7 +114,11 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"):
class AnimateDiffControlNetPipeline( class AnimateDiffControlNetPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin DiffusionPipeline,
StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
): ):
r""" r"""
Pipeline for text-to-video generation. Pipeline for text-to-video generation.
...@@ -124,8 +128,8 @@ class AnimateDiffControlNetPipeline( ...@@ -124,8 +128,8 @@ class AnimateDiffControlNetPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
Args: Args:
...@@ -234,7 +238,7 @@ class AnimateDiffControlNetPipeline( ...@@ -234,7 +238,7 @@ class AnimateDiffControlNetPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -366,7 +370,7 @@ class AnimateDiffControlNetPipeline( ...@@ -366,7 +370,7 @@ class AnimateDiffControlNetPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -27,7 +27,7 @@ import torch ...@@ -27,7 +27,7 @@ import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unet_motion_model import MotionAdapter from diffusers.models.unet_motion_model import MotionAdapter
...@@ -240,7 +240,11 @@ def retrieve_timesteps( ...@@ -240,7 +240,11 @@ def retrieve_timesteps(
class AnimateDiffImgToVideoPipeline( class AnimateDiffImgToVideoPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin DiffusionPipeline,
StableDiffusionMixin,
TextualInversionLoaderMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
): ):
r""" r"""
Pipeline for image-to-video generation. Pipeline for image-to-video generation.
...@@ -250,8 +254,8 @@ class AnimateDiffImgToVideoPipeline( ...@@ -250,8 +254,8 @@ class AnimateDiffImgToVideoPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
Args: Args:
...@@ -351,7 +355,7 @@ class AnimateDiffImgToVideoPipeline( ...@@ -351,7 +355,7 @@ class AnimateDiffImgToVideoPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -483,7 +487,7 @@ class AnimateDiffImgToVideoPipeline( ...@@ -483,7 +487,7 @@ class AnimateDiffImgToVideoPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -12,7 +12,7 @@ from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokeniz ...@@ -12,7 +12,7 @@ from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokeniz
from diffusers.image_processor import VaeImageProcessor from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import ( from diffusers.loaders import (
FromSingleFileMixin, FromSingleFileMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
) )
from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models import AutoencoderKL, UNet2DConditionModel
...@@ -89,7 +89,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): ...@@ -89,7 +89,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class DemoFusionSDXLPipeline( class DemoFusionSDXLPipeline(
DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin DiffusionPipeline,
StableDiffusionMixin,
FromSingleFileMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
): ):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion XL. Pipeline for text-to-image generation using Stable Diffusion XL.
...@@ -231,7 +235,7 @@ class DemoFusionSDXLPipeline( ...@@ -231,7 +235,7 @@ class DemoFusionSDXLPipeline(
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
......
...@@ -21,7 +21,7 @@ from transformers import CLIPTextModel, CLIPTokenizer ...@@ -21,7 +21,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel from diffusers import AutoencoderKL, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict from diffusers.configuration_utils import FrozenDict
from diffusers.image_processor import VaeImageProcessor from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models.attention import BasicTransformerBlock from diffusers.models.attention import BasicTransformerBlock
from diffusers.models.attention_processor import LoRAAttnProcessor from diffusers.models.attention_processor import LoRAAttnProcessor
from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.pipelines.pipeline_utils import DiffusionPipeline
...@@ -222,7 +222,7 @@ class FabricPipeline(DiffusionPipeline): ...@@ -222,7 +222,7 @@ class FabricPipeline(DiffusionPipeline):
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
if prompt is not None and isinstance(prompt, str): if prompt is not None and isinstance(prompt, str):
......
...@@ -35,7 +35,7 @@ from diffusers.image_processor import VaeImageProcessor ...@@ -35,7 +35,7 @@ from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import ( from diffusers.loaders import (
FromSingleFileMixin, FromSingleFileMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
) )
from diffusers.models.attention import Attention from diffusers.models.attention import Attention
...@@ -75,7 +75,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): ...@@ -75,7 +75,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class Prompt2PromptPipeline( class Prompt2PromptPipeline(
DiffusionPipeline, DiffusionPipeline,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
...@@ -87,8 +87,8 @@ class Prompt2PromptPipeline( ...@@ -87,8 +87,8 @@ class Prompt2PromptPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -286,7 +286,7 @@ class Prompt2PromptPipeline( ...@@ -286,7 +286,7 @@ class Prompt2PromptPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -420,7 +420,7 @@ class Prompt2PromptPipeline( ...@@ -420,7 +420,7 @@ class Prompt2PromptPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -27,7 +27,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV ...@@ -27,7 +27,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
from diffusers.configuration_utils import FrozenDict from diffusers.configuration_utils import FrozenDict
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from diffusers.models.attention_processor import Attention, FusedAttnProcessor2_0 from diffusers.models.attention_processor import Attention, FusedAttnProcessor2_0
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
...@@ -358,7 +363,7 @@ def retrieve_timesteps( ...@@ -358,7 +363,7 @@ def retrieve_timesteps(
class StableDiffusionBoxDiffPipeline( class StableDiffusionBoxDiffPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
): ):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion with BoxDiff. Pipeline for text-to-image generation using Stable Diffusion with BoxDiff.
...@@ -368,8 +373,8 @@ class StableDiffusionBoxDiffPipeline( ...@@ -368,8 +373,8 @@ class StableDiffusionBoxDiffPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -594,7 +599,7 @@ class StableDiffusionBoxDiffPipeline( ...@@ -594,7 +599,7 @@ class StableDiffusionBoxDiffPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -726,7 +731,7 @@ class StableDiffusionBoxDiffPipeline( ...@@ -726,7 +731,7 @@ class StableDiffusionBoxDiffPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -11,7 +11,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV ...@@ -11,7 +11,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
from diffusers.configuration_utils import FrozenDict from diffusers.configuration_utils import FrozenDict
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from diffusers.models.attention_processor import Attention, AttnProcessor2_0, FusedAttnProcessor2_0 from diffusers.models.attention_processor import Attention, AttnProcessor2_0, FusedAttnProcessor2_0
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
...@@ -328,7 +333,7 @@ def retrieve_timesteps( ...@@ -328,7 +333,7 @@ def retrieve_timesteps(
class StableDiffusionPAGPipeline( class StableDiffusionPAGPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
): ):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion. Pipeline for text-to-image generation using Stable Diffusion.
...@@ -336,8 +341,8 @@ class StableDiffusionPAGPipeline( ...@@ -336,8 +341,8 @@ class StableDiffusionPAGPipeline(
implemented for all pipelines (downloading, saving, running on a particular device, etc.). implemented for all pipelines (downloading, saving, running on a particular device, etc.).
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
Args: Args:
...@@ -560,7 +565,7 @@ class StableDiffusionPAGPipeline( ...@@ -560,7 +565,7 @@ class StableDiffusionPAGPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -692,7 +697,7 @@ class StableDiffusionPAGPipeline( ...@@ -692,7 +697,7 @@ class StableDiffusionPAGPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -22,7 +22,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -22,7 +22,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from diffusers import DiffusionPipeline from diffusers import DiffusionPipeline
from diffusers.image_processor import PipelineDepthInput, PipelineImageInput, VaeImageProcessorLDM3D from diffusers.image_processor import PipelineDepthInput, PipelineImageInput, VaeImageProcessorLDM3D
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
...@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """
class StableDiffusionUpscaleLDM3DPipeline( class StableDiffusionUpscaleLDM3DPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, FromSingleFileMixin
): ):
r""" r"""
Pipeline for text-to-image and 3D generation using LDM3D. Pipeline for text-to-image and 3D generation using LDM3D.
...@@ -79,8 +79,8 @@ class StableDiffusionUpscaleLDM3DPipeline( ...@@ -79,8 +79,8 @@ class StableDiffusionUpscaleLDM3DPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
Args: Args:
...@@ -233,7 +233,7 @@ class StableDiffusionUpscaleLDM3DPipeline( ...@@ -233,7 +233,7 @@ class StableDiffusionUpscaleLDM3DPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -365,7 +365,7 @@ class StableDiffusionUpscaleLDM3DPipeline( ...@@ -365,7 +365,7 @@ class StableDiffusionUpscaleLDM3DPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -33,7 +33,7 @@ from diffusers import DiffusionPipeline ...@@ -33,7 +33,7 @@ from diffusers import DiffusionPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import ( from diffusers.loaders import (
FromSingleFileMixin, FromSingleFileMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
StableDiffusionXLLoraLoaderMixin, StableDiffusionXLLoraLoaderMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
) )
...@@ -300,7 +300,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): ...@@ -300,7 +300,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class StableDiffusionXLControlNetAdapterInpaintPipeline( class StableDiffusionXLControlNetAdapterInpaintPipeline(
DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, LoraLoaderMixin DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin, StableDiffusionLoraLoaderMixin
): ):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
......
...@@ -178,11 +178,11 @@ class StableDiffusionXLDifferentialImg2ImgPipeline( ...@@ -178,11 +178,11 @@ class StableDiffusionXLDifferentialImg2ImgPipeline(
In addition the pipeline inherits the following loading methods: In addition the pipeline inherits the following loading methods:
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]
- *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`] - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
as well as the following saving methods: as well as the following saving methods:
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`]
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
......
...@@ -11,7 +11,7 @@ from tqdm.auto import tqdm ...@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
from transformers import CLIPTextModel, CLIPTokenizer from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, DiffusionPipeline, DPMSolverMultistepScheduler, UNet2DConditionModel from diffusers import AutoencoderKL, DiffusionPipeline, DPMSolverMultistepScheduler, UNet2DConditionModel
from diffusers.loaders import AttnProcsLayers, LoraLoaderMixin from diffusers.loaders import AttnProcsLayers, StableDiffusionLoraLoaderMixin
from diffusers.models.attention_processor import ( from diffusers.models.attention_processor import (
AttnAddedKVProcessor, AttnAddedKVProcessor,
AttnAddedKVProcessor2_0, AttnAddedKVProcessor2_0,
...@@ -321,7 +321,7 @@ class SdeDragPipeline(DiffusionPipeline): ...@@ -321,7 +321,7 @@ class SdeDragPipeline(DiffusionPipeline):
optimizer.zero_grad() optimizer.zero_grad()
with tempfile.TemporaryDirectory() as save_lora_dir: with tempfile.TemporaryDirectory() as save_lora_dir:
LoraLoaderMixin.save_lora_weights( StableDiffusionLoraLoaderMixin.save_lora_weights(
save_directory=save_lora_dir, save_directory=save_lora_dir,
unet_lora_layers=unet_lora_layers, unet_lora_layers=unet_lora_layers,
text_encoder_lora_layers=None, text_encoder_lora_layers=None,
......
...@@ -21,7 +21,7 @@ from packaging import version ...@@ -21,7 +21,7 @@ from packaging import version
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from diffusers.configuration_utils import FrozenDict from diffusers.configuration_utils import FrozenDict
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
...@@ -61,7 +61,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -61,7 +61,7 @@ EXAMPLE_DOC_STRING = """
class StableDiffusionIPEXPipeline( class StableDiffusionIPEXPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
): ):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion on IPEX. Pipeline for text-to-image generation using Stable Diffusion on IPEX.
......
...@@ -11,7 +11,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -11,7 +11,12 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict, deprecate from diffusers.configuration_utils import FrozenDict, deprecate
from diffusers.image_processor import VaeImageProcessor from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from diffusers.models.attention import BasicTransformerBlock from diffusers.models.attention import BasicTransformerBlock
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.models.unets.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D from diffusers.models.unets.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, UpBlock2D
...@@ -76,7 +81,7 @@ def torch_dfs(model: torch.nn.Module): ...@@ -76,7 +81,7 @@ def torch_dfs(model: torch.nn.Module):
class StableDiffusionReferencePipeline( class StableDiffusionReferencePipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
): ):
r""" r"""
Pipeline for Stable Diffusion Reference. Pipeline for Stable Diffusion Reference.
...@@ -86,8 +91,8 @@ class StableDiffusionReferencePipeline( ...@@ -86,8 +91,8 @@ class StableDiffusionReferencePipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -443,7 +448,7 @@ class StableDiffusionReferencePipeline( ...@@ -443,7 +448,7 @@ class StableDiffusionReferencePipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -575,7 +580,7 @@ class StableDiffusionReferencePipeline( ...@@ -575,7 +580,7 @@ class StableDiffusionReferencePipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel from diffusers import AutoencoderKL, DiffusionPipeline, UNet2DConditionModel
from diffusers.configuration_utils import FrozenDict, deprecate from diffusers.configuration_utils import FrozenDict, deprecate
from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.safety_checker import ( from diffusers.pipelines.stable_diffusion.safety_checker import (
...@@ -140,7 +140,7 @@ def prepare_mask_and_masked_image(image, mask): ...@@ -140,7 +140,7 @@ def prepare_mask_and_masked_image(image, mask):
class StableDiffusionRepaintPipeline( class StableDiffusionRepaintPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
): ):
r""" r"""
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
...@@ -148,9 +148,9 @@ class StableDiffusionRepaintPipeline( ...@@ -148,9 +148,9 @@ class StableDiffusionRepaintPipeline(
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
In addition the pipeline inherits the following loading methods: In addition the pipeline inherits the following loading methods:
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]
as well as the following saving methods: as well as the following saving methods:
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`]
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
......
...@@ -425,8 +425,8 @@ pipe.load_lora_weights(lora_model_id) ...@@ -425,8 +425,8 @@ pipe.load_lora_weights(lora_model_id)
image = pipe("A picture of a sks dog in a bucket", num_inference_steps=25).images[0] image = pipe("A picture of a sks dog in a bucket", num_inference_steps=25).images[0]
``` ```
Note that the use of [`LoraLoaderMixin.load_lora_weights`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.load_lora_weights) is preferred to [`UNet2DConditionLoadersMixin.load_attn_procs`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs) for loading LoRA parameters. This is because Note that the use of [`StableDiffusionLoraLoaderMixin.load_lora_weights`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.StableDiffusionLoraLoaderMixin.load_lora_weights) is preferred to [`UNet2DConditionLoadersMixin.load_attn_procs`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs) for loading LoRA parameters. This is because
`LoraLoaderMixin.load_lora_weights` can handle the following situations: `StableDiffusionLoraLoaderMixin.load_lora_weights` can handle the following situations:
* LoRA parameters that don't have separate identifiers for the UNet and the text encoder (such as [`"patrickvonplaten/lora_dreambooth_dog_example"`](https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example)). So, you can just do: * LoRA parameters that don't have separate identifiers for the UNet and the text encoder (such as [`"patrickvonplaten/lora_dreambooth_dog_example"`](https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example)). So, you can just do:
......
...@@ -52,7 +52,7 @@ from diffusers import ( ...@@ -52,7 +52,7 @@ from diffusers import (
StableDiffusionPipeline, StableDiffusionPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.loaders import LoraLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin
from diffusers.optimization import get_scheduler from diffusers.optimization import get_scheduler
from diffusers.training_utils import _set_state_dict_into_text_encoder, cast_training_params from diffusers.training_utils import _set_state_dict_into_text_encoder, cast_training_params
from diffusers.utils import ( from diffusers.utils import (
...@@ -956,7 +956,7 @@ def main(args): ...@@ -956,7 +956,7 @@ def main(args):
# make sure to pop weight so that corresponding model is not saved again # make sure to pop weight so that corresponding model is not saved again
weights.pop() weights.pop()
LoraLoaderMixin.save_lora_weights( StableDiffusionLoraLoaderMixin.save_lora_weights(
output_dir, output_dir,
unet_lora_layers=unet_lora_layers_to_save, unet_lora_layers=unet_lora_layers_to_save,
text_encoder_lora_layers=text_encoder_lora_layers_to_save, text_encoder_lora_layers=text_encoder_lora_layers_to_save,
...@@ -976,7 +976,7 @@ def main(args): ...@@ -976,7 +976,7 @@ def main(args):
else: else:
raise ValueError(f"unexpected save model: {model.__class__}") raise ValueError(f"unexpected save model: {model.__class__}")
lora_state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(input_dir) lora_state_dict, network_alphas = StableDiffusionLoraLoaderMixin.lora_state_dict(input_dir)
unet_state_dict = {f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")} unet_state_dict = {f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")}
unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict) unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
...@@ -1376,7 +1376,7 @@ def main(args): ...@@ -1376,7 +1376,7 @@ def main(args):
else: else:
text_encoder_state_dict = None text_encoder_state_dict = None
LoraLoaderMixin.save_lora_weights( StableDiffusionLoraLoaderMixin.save_lora_weights(
save_directory=args.output_dir, save_directory=args.output_dir,
unet_lora_layers=unet_lora_state_dict, unet_lora_layers=unet_lora_state_dict,
text_encoder_lora_layers=text_encoder_state_dict, text_encoder_lora_layers=text_encoder_state_dict,
......
...@@ -58,7 +58,7 @@ from diffusers import ( ...@@ -58,7 +58,7 @@ from diffusers import (
StableDiffusionXLPipeline, StableDiffusionXLPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.loaders import LoraLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin
from diffusers.optimization import get_scheduler from diffusers.optimization import get_scheduler
from diffusers.training_utils import _set_state_dict_into_text_encoder, cast_training_params, compute_snr from diffusers.training_utils import _set_state_dict_into_text_encoder, cast_training_params, compute_snr
from diffusers.utils import ( from diffusers.utils import (
...@@ -1260,7 +1260,7 @@ def main(args): ...@@ -1260,7 +1260,7 @@ def main(args):
else: else:
raise ValueError(f"unexpected save model: {model.__class__}") raise ValueError(f"unexpected save model: {model.__class__}")
lora_state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(input_dir) lora_state_dict, network_alphas = StableDiffusionLoraLoaderMixin.lora_state_dict(input_dir)
unet_state_dict = {f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")} unet_state_dict = {f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")}
unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict) unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
......
...@@ -49,7 +49,7 @@ from diffusers import ( ...@@ -49,7 +49,7 @@ from diffusers import (
DPMSolverMultistepScheduler, DPMSolverMultistepScheduler,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.loaders import LoraLoaderMixin from diffusers.loaders import StableDiffusionLoraLoaderMixin
from diffusers.optimization import get_scheduler from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version, convert_state_dict_to_diffusers from diffusers.utils import check_min_version, convert_state_dict_to_diffusers
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
...@@ -604,7 +604,7 @@ def main(args): ...@@ -604,7 +604,7 @@ def main(args):
# make sure to pop weight so that corresponding model is not saved again # make sure to pop weight so that corresponding model is not saved again
weights.pop() weights.pop()
LoraLoaderMixin.save_lora_weights( StableDiffusionLoraLoaderMixin.save_lora_weights(
output_dir, output_dir,
unet_lora_layers=unet_lora_layers_to_save, unet_lora_layers=unet_lora_layers_to_save,
text_encoder_lora_layers=None, text_encoder_lora_layers=None,
...@@ -621,8 +621,8 @@ def main(args): ...@@ -621,8 +621,8 @@ def main(args):
else: else:
raise ValueError(f"unexpected save model: {model.__class__}") raise ValueError(f"unexpected save model: {model.__class__}")
lora_state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(input_dir) lora_state_dict, network_alphas = StableDiffusionLoraLoaderMixin.lora_state_dict(input_dir)
LoraLoaderMixin.load_lora_into_unet(lora_state_dict, network_alphas=network_alphas, unet=unet_) StableDiffusionLoraLoaderMixin.load_lora_into_unet(lora_state_dict, network_alphas=network_alphas, unet=unet_)
accelerator.register_save_state_pre_hook(save_model_hook) accelerator.register_save_state_pre_hook(save_model_hook)
accelerator.register_load_state_pre_hook(load_model_hook) accelerator.register_load_state_pre_hook(load_model_hook)
...@@ -951,7 +951,7 @@ def main(args): ...@@ -951,7 +951,7 @@ def main(args):
unet = unet.to(torch.float32) unet = unet.to(torch.float32)
unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unet)) unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unet))
LoraLoaderMixin.save_lora_weights( StableDiffusionLoraLoaderMixin.save_lora_weights(
save_directory=args.output_dir, unet_lora_layers=unet_lora_state_dict, text_encoder_lora_layers=None save_directory=args.output_dir, unet_lora_layers=unet_lora_state_dict, text_encoder_lora_layers=None
) )
......
...@@ -28,7 +28,7 @@ import torch.nn.functional as F ...@@ -28,7 +28,7 @@ import torch.nn.functional as F
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from diffusers.loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from diffusers.models import AutoencoderKL, ControlNetModel, UNet2DConditionModel from diffusers.models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.lora import adjust_lora_scale_text_encoder
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
...@@ -142,7 +142,9 @@ def retrieve_timesteps( ...@@ -142,7 +142,9 @@ def retrieve_timesteps(
return timesteps, num_inference_steps return timesteps, num_inference_steps
class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin): class PromptDiffusionPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, FromSingleFileMixin
):
r""" r"""
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance. Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
...@@ -153,8 +155,8 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -153,8 +155,8 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
Args: Args:
...@@ -348,7 +350,7 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -348,7 +350,7 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -480,7 +482,7 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -480,7 +482,7 @@ class PromptDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment