Unverified Commit d87fe95f authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[Chore] add `LoraLoaderMixin` to the inits (#8981)



* introduce  to promote reusability.

* up

* add more tests

* up

* remove comments.

* fix fuse_nan test

* clarify the scope of fuse_lora and unfuse_lora

* remove space

* rewrite fuse_lora a bit.

* feedback

* copy over load_lora_into_text_encoder.

* address dhruv's feedback.

* fix-copies

* fix issubclass.

* num_fused_loras

* fix

* fix

* remove mapping

* up

* fix

* style

* fix-copies

* change to SD3TransformerLoRALoadersMixin

* Apply suggestions from code review
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* up

* handle wuerstchen

* up

* move lora to lora_pipeline.py

* up

* fix-copies

* fix documentation.

* comment set_adapters().

* fix-copies

* fix set_adapters() at the model level.

* fix?

* fix

* loraloadermixin.

---------
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent 50e66f2f
...@@ -9,7 +9,7 @@ import PIL.Image ...@@ -9,7 +9,7 @@ import PIL.Image
import torch import torch
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
from ...loaders import LoraLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin
from ...models import UNet2DConditionModel from ...models import UNet2DConditionModel
from ...schedulers import DDPMScheduler from ...schedulers import DDPMScheduler
from ...utils import ( from ...utils import (
...@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """
""" """
class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin): class IFInpaintingPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
tokenizer: T5Tokenizer tokenizer: T5Tokenizer
text_encoder: T5EncoderModel text_encoder: T5EncoderModel
......
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
from ...loaders import LoraLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin
from ...models import UNet2DConditionModel from ...models import UNet2DConditionModel
from ...schedulers import DDPMScheduler from ...schedulers import DDPMScheduler
from ...utils import ( from ...utils import (
...@@ -113,7 +113,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -113,7 +113,7 @@ EXAMPLE_DOC_STRING = """
""" """
class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin): class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
tokenizer: T5Tokenizer tokenizer: T5Tokenizer
text_encoder: T5EncoderModel text_encoder: T5EncoderModel
......
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
from ...loaders import LoraLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin
from ...models import UNet2DConditionModel from ...models import UNet2DConditionModel
from ...schedulers import DDPMScheduler from ...schedulers import DDPMScheduler
from ...utils import ( from ...utils import (
...@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """ ...@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """
""" """
class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin): class IFSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
tokenizer: T5Tokenizer tokenizer: T5Tokenizer
text_encoder: T5EncoderModel text_encoder: T5EncoderModel
......
...@@ -21,7 +21,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR ...@@ -21,7 +21,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
from ....configuration_utils import FrozenDict from ....configuration_utils import FrozenDict
from ....image_processor import PipelineImageInput, VaeImageProcessor from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers from ....schedulers import KarrasDiffusionSchedulers
...@@ -137,7 +142,7 @@ class AltDiffusionPipeline( ...@@ -137,7 +142,7 @@ class AltDiffusionPipeline(
DiffusionPipeline, DiffusionPipeline,
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
...@@ -149,8 +154,8 @@ class AltDiffusionPipeline( ...@@ -149,8 +154,8 @@ class AltDiffusionPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -346,7 +351,7 @@ class AltDiffusionPipeline( ...@@ -346,7 +351,7 @@ class AltDiffusionPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -478,7 +483,7 @@ class AltDiffusionPipeline( ...@@ -478,7 +483,7 @@ class AltDiffusionPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -23,7 +23,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR ...@@ -23,7 +23,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
from ....configuration_utils import FrozenDict from ....configuration_utils import FrozenDict
from ....image_processor import PipelineImageInput, VaeImageProcessor from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import (
FromSingleFileMixin,
IPAdapterMixin,
StableDiffusionLoraLoaderMixin,
TextualInversionLoaderMixin,
)
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers from ....schedulers import KarrasDiffusionSchedulers
...@@ -178,7 +183,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -178,7 +183,7 @@ class AltDiffusionImg2ImgPipeline(
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
r""" r"""
...@@ -189,8 +194,8 @@ class AltDiffusionImg2ImgPipeline( ...@@ -189,8 +194,8 @@ class AltDiffusionImg2ImgPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -386,7 +391,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -386,7 +391,7 @@ class AltDiffusionImg2ImgPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -518,7 +523,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -518,7 +523,7 @@ class AltDiffusionImg2ImgPipeline(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from ....configuration_utils import FrozenDict from ....configuration_utils import FrozenDict
from ....image_processor import PipelineImageInput, VaeImageProcessor from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, UNet2DConditionModel from ....models import AutoencoderKL, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import DDIMScheduler from ....schedulers import DDIMScheduler
...@@ -136,7 +136,7 @@ def compute_noise(scheduler, prev_latents, latents, timestep, noise_pred, eta): ...@@ -136,7 +136,7 @@ def compute_noise(scheduler, prev_latents, latents, timestep, noise_pred, eta):
return noise return noise
class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin):
r""" r"""
Pipeline for text-guided image to image generation using Stable Diffusion. Pipeline for text-guided image to image generation using Stable Diffusion.
...@@ -145,8 +145,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -145,8 +145,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
...@@ -324,7 +324,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -324,7 +324,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -457,7 +457,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -457,7 +457,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from ....configuration_utils import FrozenDict from ....configuration_utils import FrozenDict
from ....image_processor import VaeImageProcessor from ....image_processor import VaeImageProcessor
from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, UNet2DConditionModel from ....models import AutoencoderKL, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers from ....schedulers import KarrasDiffusionSchedulers
...@@ -79,7 +79,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8): ...@@ -79,7 +79,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
class StableDiffusionInpaintPipelineLegacy( class StableDiffusionInpaintPipelineLegacy(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, FromSingleFileMixin
): ):
r""" r"""
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*. Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
...@@ -89,11 +89,11 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -89,11 +89,11 @@ class StableDiffusionInpaintPipelineLegacy(
In addition the pipeline inherits the following loading methods: In addition the pipeline inherits the following loading methods:
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`] - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]
- *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`] - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
as well as the following saving methods: as well as the following saving methods:
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`] - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`]
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
...@@ -294,7 +294,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -294,7 +294,7 @@ class StableDiffusionInpaintPipelineLegacy(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -427,7 +427,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -427,7 +427,7 @@ class StableDiffusionInpaintPipelineLegacy(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -19,7 +19,7 @@ import torch ...@@ -19,7 +19,7 @@ import torch
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ....image_processor import VaeImageProcessor from ....image_processor import VaeImageProcessor
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, UNet2DConditionModel from ....models import AutoencoderKL, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import PNDMScheduler from ....schedulers import PNDMScheduler
...@@ -37,7 +37,7 @@ AUGS_CONST = ["A photo of ", "An image of ", "A picture of "] ...@@ -37,7 +37,7 @@ AUGS_CONST = ["A photo of ", "An image of ", "A picture of "]
class StableDiffusionModelEditingPipeline( class StableDiffusionModelEditingPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
): ):
r""" r"""
Pipeline for text-to-image model editing. Pipeline for text-to-image model editing.
...@@ -47,8 +47,8 @@ class StableDiffusionModelEditingPipeline( ...@@ -47,8 +47,8 @@ class StableDiffusionModelEditingPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
...@@ -232,7 +232,7 @@ class StableDiffusionModelEditingPipeline( ...@@ -232,7 +232,7 @@ class StableDiffusionModelEditingPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -365,7 +365,7 @@ class StableDiffusionModelEditingPipeline( ...@@ -365,7 +365,7 @@ class StableDiffusionModelEditingPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -19,7 +19,7 @@ import torch ...@@ -19,7 +19,7 @@ import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from ....image_processor import VaeImageProcessor from ....image_processor import VaeImageProcessor
from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, UNet2DConditionModel from ....models import AutoencoderKL, UNet2DConditionModel
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
from ....schedulers import KarrasDiffusionSchedulers from ....schedulers import KarrasDiffusionSchedulers
...@@ -63,7 +63,11 @@ EXAMPLE_DOC_STRING = """ ...@@ -63,7 +63,11 @@ EXAMPLE_DOC_STRING = """
class StableDiffusionParadigmsPipeline( class StableDiffusionParadigmsPipeline(
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin DiffusionPipeline,
StableDiffusionMixin,
TextualInversionLoaderMixin,
StableDiffusionLoraLoaderMixin,
FromSingleFileMixin,
): ):
r""" r"""
Pipeline for text-to-image generation using a parallelized version of Stable Diffusion. Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
...@@ -73,8 +77,8 @@ class StableDiffusionParadigmsPipeline( ...@@ -73,8 +77,8 @@ class StableDiffusionParadigmsPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
Args: Args:
...@@ -223,7 +227,7 @@ class StableDiffusionParadigmsPipeline( ...@@ -223,7 +227,7 @@ class StableDiffusionParadigmsPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -356,7 +360,7 @@ class StableDiffusionParadigmsPipeline( ...@@ -356,7 +360,7 @@ class StableDiffusionParadigmsPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -29,7 +29,7 @@ from transformers import ( ...@@ -29,7 +29,7 @@ from transformers import (
) )
from ....image_processor import PipelineImageInput, VaeImageProcessor from ....image_processor import PipelineImageInput, VaeImageProcessor
from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ....models import AutoencoderKL, UNet2DConditionModel from ....models import AutoencoderKL, UNet2DConditionModel
from ....models.attention_processor import Attention from ....models.attention_processor import Attention
from ....models.lora import adjust_lora_scale_text_encoder from ....models.lora import adjust_lora_scale_text_encoder
...@@ -446,7 +446,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -446,7 +446,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -579,7 +579,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -579,7 +579,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -3,7 +3,7 @@ from typing import Callable, Dict, List, Optional, Union ...@@ -3,7 +3,7 @@ from typing import Callable, Dict, List, Optional, Union
import torch import torch
from transformers import T5EncoderModel, T5Tokenizer from transformers import T5EncoderModel, T5Tokenizer
from ...loaders import LoraLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin
from ...models import Kandinsky3UNet, VQModel from ...models import Kandinsky3UNet, VQModel
from ...schedulers import DDPMScheduler from ...schedulers import DDPMScheduler
from ...utils import ( from ...utils import (
...@@ -47,7 +47,7 @@ def downscale_height_and_width(height, width, scale_factor=8): ...@@ -47,7 +47,7 @@ def downscale_height_and_width(height, width, scale_factor=8):
return new_height * scale_factor, new_width * scale_factor return new_height * scale_factor, new_width * scale_factor
class Kandinsky3Pipeline(DiffusionPipeline, LoraLoaderMixin): class Kandinsky3Pipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
model_cpu_offload_seq = "text_encoder->unet->movq" model_cpu_offload_seq = "text_encoder->unet->movq"
_callback_tensor_inputs = [ _callback_tensor_inputs = [
"latents", "latents",
......
...@@ -7,7 +7,7 @@ import PIL.Image ...@@ -7,7 +7,7 @@ import PIL.Image
import torch import torch
from transformers import T5EncoderModel, T5Tokenizer from transformers import T5EncoderModel, T5Tokenizer
from ...loaders import LoraLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin
from ...models import Kandinsky3UNet, VQModel from ...models import Kandinsky3UNet, VQModel
from ...schedulers import DDPMScheduler from ...schedulers import DDPMScheduler
from ...utils import ( from ...utils import (
...@@ -62,7 +62,7 @@ def prepare_image(pil_image): ...@@ -62,7 +62,7 @@ def prepare_image(pil_image):
return image return image
class Kandinsky3Img2ImgPipeline(DiffusionPipeline, LoraLoaderMixin): class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
model_cpu_offload_seq = "text_encoder->movq->unet->movq" model_cpu_offload_seq = "text_encoder->movq->unet->movq"
_callback_tensor_inputs = [ _callback_tensor_inputs = [
"latents", "latents",
......
...@@ -23,7 +23,7 @@ import torch ...@@ -23,7 +23,7 @@ import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import LCMScheduler from ...schedulers import LCMScheduler
...@@ -148,7 +148,7 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -148,7 +148,7 @@ class LatentConsistencyModelImg2ImgPipeline(
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
r""" r"""
...@@ -159,8 +159,8 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -159,8 +159,8 @@ class LatentConsistencyModelImg2ImgPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -273,7 +273,7 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -273,7 +273,7 @@ class LatentConsistencyModelImg2ImgPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -406,7 +406,7 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -406,7 +406,7 @@ class LatentConsistencyModelImg2ImgPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -22,7 +22,7 @@ import torch ...@@ -22,7 +22,7 @@ import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import LCMScheduler from ...schedulers import LCMScheduler
...@@ -126,7 +126,7 @@ class LatentConsistencyModelPipeline( ...@@ -126,7 +126,7 @@ class LatentConsistencyModelPipeline(
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
r""" r"""
...@@ -137,8 +137,8 @@ class LatentConsistencyModelPipeline( ...@@ -137,8 +137,8 @@ class LatentConsistencyModelPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -257,7 +257,7 @@ class LatentConsistencyModelPipeline( ...@@ -257,7 +257,7 @@ class LatentConsistencyModelPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -390,7 +390,7 @@ class LatentConsistencyModelPipeline( ...@@ -390,7 +390,7 @@ class LatentConsistencyModelPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -10,7 +10,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer ...@@ -10,7 +10,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from ...configuration_utils import FrozenDict from ...configuration_utils import FrozenDict
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.attention_processor import Attention, AttnProcessor from ...models.attention_processor import Attention, AttnProcessor
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
...@@ -248,7 +248,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): ...@@ -248,7 +248,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
class LEditsPPPipelineStableDiffusion( class LEditsPPPipelineStableDiffusion(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
): ):
""" """
Pipeline for textual image editing using LEDits++ with Stable Diffusion. Pipeline for textual image editing using LEDits++ with Stable Diffusion.
...@@ -538,7 +538,7 @@ class LEditsPPPipelineStableDiffusion( ...@@ -538,7 +538,7 @@ class LEditsPPPipelineStableDiffusion(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -676,7 +676,7 @@ class LEditsPPPipelineStableDiffusion( ...@@ -676,7 +676,7 @@ class LEditsPPPipelineStableDiffusion(
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV ...@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
from ...callbacks import MultiPipelineCallbacks, PipelineCallback from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
...@@ -159,7 +159,7 @@ class StableDiffusionControlNetPAGPipeline( ...@@ -159,7 +159,7 @@ class StableDiffusionControlNetPAGPipeline(
DiffusionPipeline, DiffusionPipeline,
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
FromSingleFileMixin, FromSingleFileMixin,
PAGMixin, PAGMixin,
...@@ -172,8 +172,8 @@ class StableDiffusionControlNetPAGPipeline( ...@@ -172,8 +172,8 @@ class StableDiffusionControlNetPAGPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -305,7 +305,7 @@ class StableDiffusionControlNetPAGPipeline( ...@@ -305,7 +305,7 @@ class StableDiffusionControlNetPAGPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -438,7 +438,7 @@ class StableDiffusionControlNetPAGPipeline( ...@@ -438,7 +438,7 @@ class StableDiffusionControlNetPAGPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -20,7 +20,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV ...@@ -20,7 +20,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
from ...configuration_utils import FrozenDict from ...configuration_utils import FrozenDict
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
...@@ -137,7 +137,7 @@ class StableDiffusionPAGPipeline( ...@@ -137,7 +137,7 @@ class StableDiffusionPAGPipeline(
DiffusionPipeline, DiffusionPipeline,
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
FromSingleFileMixin, FromSingleFileMixin,
PAGMixin, PAGMixin,
...@@ -150,8 +150,8 @@ class StableDiffusionPAGPipeline( ...@@ -150,8 +150,8 @@ class StableDiffusionPAGPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -319,7 +319,7 @@ class StableDiffusionPAGPipeline( ...@@ -319,7 +319,7 @@ class StableDiffusionPAGPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -452,7 +452,7 @@ class StableDiffusionPAGPipeline( ...@@ -452,7 +452,7 @@ class StableDiffusionPAGPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -22,7 +22,7 @@ import torch ...@@ -22,7 +22,7 @@ import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from ...image_processor import PipelineImageInput from ...image_processor import PipelineImageInput
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...models.unets.unet_motion_model import MotionAdapter from ...models.unets.unet_motion_model import MotionAdapter
...@@ -128,7 +128,7 @@ class PIAPipeline( ...@@ -128,7 +128,7 @@ class PIAPipeline(
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
FromSingleFileMixin, FromSingleFileMixin,
FreeInitMixin, FreeInitMixin,
): ):
...@@ -140,8 +140,8 @@ class PIAPipeline( ...@@ -140,8 +140,8 @@ class PIAPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
Args: Args:
...@@ -243,7 +243,7 @@ class PIAPipeline( ...@@ -243,7 +243,7 @@ class PIAPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -376,7 +376,7 @@ class PIAPipeline( ...@@ -376,7 +376,7 @@ class PIAPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -21,7 +21,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV ...@@ -21,7 +21,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
from ...callbacks import MultiPipelineCallbacks, PipelineCallback from ...callbacks import MultiPipelineCallbacks, PipelineCallback
from ...configuration_utils import FrozenDict from ...configuration_utils import FrozenDict
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
...@@ -133,7 +133,7 @@ class StableDiffusionPipeline( ...@@ -133,7 +133,7 @@ class StableDiffusionPipeline(
DiffusionPipeline, DiffusionPipeline,
StableDiffusionMixin, StableDiffusionMixin,
TextualInversionLoaderMixin, TextualInversionLoaderMixin,
LoraLoaderMixin, StableDiffusionLoraLoaderMixin,
IPAdapterMixin, IPAdapterMixin,
FromSingleFileMixin, FromSingleFileMixin,
): ):
...@@ -145,8 +145,8 @@ class StableDiffusionPipeline( ...@@ -145,8 +145,8 @@ class StableDiffusionPipeline(
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
...@@ -342,7 +342,7 @@ class StableDiffusionPipeline( ...@@ -342,7 +342,7 @@ class StableDiffusionPipeline(
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -475,7 +475,7 @@ class StableDiffusionPipeline( ...@@ -475,7 +475,7 @@ class StableDiffusionPipeline(
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
...@@ -24,7 +24,7 @@ from transformers import CLIPTextModel, CLIPTokenizer, DPTFeatureExtractor, DPTF ...@@ -24,7 +24,7 @@ from transformers import CLIPTextModel, CLIPTokenizer, DPTFeatureExtractor, DPTF
from ...configuration_utils import FrozenDict from ...configuration_utils import FrozenDict
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
...@@ -74,7 +74,7 @@ def preprocess(image): ...@@ -74,7 +74,7 @@ def preprocess(image):
return image return image
class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin): class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin):
r""" r"""
Pipeline for text-guided depth-based image-to-image generation using Stable Diffusion. Pipeline for text-guided depth-based image-to-image generation using Stable Diffusion.
...@@ -83,8 +83,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader ...@@ -83,8 +83,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
The pipeline also inherits the following loading methods: The pipeline also inherits the following loading methods:
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
- [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
- [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
Args: Args:
vae ([`AutoencoderKL`]): vae ([`AutoencoderKL`]):
...@@ -225,7 +225,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader ...@@ -225,7 +225,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
""" """
# set lora scale so that monkey patched LoRA # set lora scale so that monkey patched LoRA
# function of text encoder can correctly access it # function of text encoder can correctly access it
if lora_scale is not None and isinstance(self, LoraLoaderMixin): if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
self._lora_scale = lora_scale self._lora_scale = lora_scale
# dynamically adjust the LoRA scale # dynamically adjust the LoRA scale
...@@ -358,7 +358,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader ...@@ -358,7 +358,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
if self.text_encoder is not None: if self.text_encoder is not None:
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
# Retrieve the original scale by scaling back the LoRA layers # Retrieve the original scale by scaling back the LoRA layers
unscale_lora_layers(self.text_encoder, lora_scale) unscale_lora_layers(self.text_encoder, lora_scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment