Unverified Commit e25e525f authored by Sayak Paul's avatar Sayak Paul Committed by GitHub
Browse files

[LoRA test suite] refactor the test suite and cleanse it (#7316)

* cleanse and refactor lora testing suite.

* more cleanup.

* make check_if_lora_correctly_set a utility function

* fix: typo

* retrigger ci

* style
parent de9adb90
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Conversion script for the Stable Diffusion checkpoints.""" """Conversion script for the Stable Diffusion checkpoints."""
import os import os
import re import re
......
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" PyTorch - Flax general utilities.""" """PyTorch - Flax general utilities."""
import re import re
import jax.numpy as jnp import jax.numpy as jnp
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" PyTorch - Flax general utilities.""" """PyTorch - Flax general utilities."""
from pickle import UnpicklingError from pickle import UnpicklingError
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Conversion script for the Stable Diffusion checkpoints.""" """Conversion script for the Stable Diffusion checkpoints."""
import re import re
from contextlib import nullcontext from contextlib import nullcontext
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
""" """
Doc utilities: Utilities related to documentation Doc utilities: Utilities related to documentation
""" """
import re import re
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Logging utilities.""" """Logging utilities."""
import logging import logging
import os import os
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
""" """
PEFT utilities: Utilities related to peft library PEFT utilities: Utilities related to peft library
""" """
import collections import collections
import importlib import importlib
from typing import Optional from typing import Optional
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
""" """
State dict utilities: utility methods for converting state dicts easily State dict utilities: utility methods for converting state dicts easily
""" """
import enum import enum
from .logging import get_logger from .logging import get_logger
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
""" """
PyTorch utilities: Utilities related to PyTorch PyTorch utilities: Utilities related to PyTorch
""" """
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
from . import logging from . import logging
......
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import sys
import unittest
import numpy as np
import torch
import torch.nn as nn
from huggingface_hub import hf_hub_download
from huggingface_hub.repocard import RepoCard
from safetensors.torch import load_file
from diffusers import (
AutoPipelineForImage2Image,
AutoPipelineForText2Image,
DDIMScheduler,
DiffusionPipeline,
LCMScheduler,
StableDiffusionPipeline,
)
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
load_image,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
slow,
torch_device,
)
sys.path.append(".")
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402
if is_accelerate_available():
from accelerate.utils import release_memory
class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
pipeline_class = StableDiffusionPipeline
scheduler_cls = DDIMScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"clip_sample": False,
"set_alpha_to_one": False,
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"cross_attention_dim": 32,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
# Keeping this test here makes sense because it doesn't look any integration
# (value assertions on logits).
@slow
@require_torch_gpu
def test_integration_move_lora_cpu(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
pipe = pipe.to(torch_device)
self.assertTrue(
check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder",
)
self.assertTrue(
check_if_lora_correctly_set(pipe.unet),
"Lora not correctly set in text encoder",
)
# We will offload the first adapter in CPU and check if the offloading
# has been performed correctly
pipe.set_lora_device(["adapter-1"], "cpu")
for name, module in pipe.unet.named_modules():
if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device == torch.device("cpu"))
elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device != torch.device("cpu"))
for name, module in pipe.text_encoder.named_modules():
if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device == torch.device("cpu"))
elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device != torch.device("cpu"))
pipe.set_lora_device(["adapter-1"], 0)
for n, m in pipe.unet.named_modules():
if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
for n, m in pipe.text_encoder.named_modules():
if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
pipe.set_lora_device(["adapter-1", "adapter-2"], torch_device)
for n, m in pipe.unet.named_modules():
if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
for n, m in pipe.text_encoder.named_modules():
if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
@slow
@require_torch_gpu
@require_peft_backend
class LoraIntegrationTests(unittest.TestCase):
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_integration_logits_with_scale(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
pipe.load_lora_weights(lora_id)
pipe = pipe.to(torch_device)
self.assertTrue(
check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder",
)
prompt = "a red sks dog"
images = pipe(
prompt=prompt,
num_inference_steps=15,
cross_attention_kwargs={"scale": 0.5},
generator=torch.manual_seed(0),
output_type="np",
).images
expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
predicted_slice = images[0, -3:, -3:, -1].flatten()
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_integration_logits_no_scale(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
pipe.load_lora_weights(lora_id)
pipe = pipe.to(torch_device)
self.assertTrue(
check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder",
)
prompt = "a red sks dog"
images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
predicted_slice = images[0, -3:, -3:, -1].flatten()
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_dreambooth_old_format(self):
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe(
"A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_dreambooth_text_encoder_new_format(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/lora-trained"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_a1111(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
torch_device
)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_lycoris(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained(
"hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
).to(torch_device)
lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
lora_filename = "edgLycorisMugler-light.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_a1111_with_model_cpu_offload(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload()
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_a1111_with_sequential_cpu_offload(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload()
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_kohya_sd_v15_with_higher_dimensions(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
lora_model_id = "hf-internal-testing/urushisato-lora"
lora_filename = "urushisato_v15.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_vanilla_funetuning(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_unload_kohya_lora(self):
generator = torch.manual_seed(0)
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
initial_images = initial_images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
lora_filename = "Colored_Icons_by_vizsumit.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images = lora_images[0, -3:, -3:, -1].flatten()
pipe.unload_lora_weights()
generator = torch.manual_seed(0)
unloaded_lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
self.assertFalse(np.allclose(initial_images, lora_images))
self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
release_memory(pipe)
def test_load_unload_load_kohya_lora(self):
# This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
# without introducing any side-effects. Even though the test uses a Kohya-style
# LoRA, the underlying adapter handling mechanism is format-agnostic.
generator = torch.manual_seed(0)
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
initial_images = initial_images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
lora_filename = "Colored_Icons_by_vizsumit.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images = lora_images[0, -3:, -3:, -1].flatten()
pipe.unload_lora_weights()
generator = torch.manual_seed(0)
unloaded_lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
self.assertFalse(np.allclose(initial_images, lora_images))
self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
# make sure we can load a LoRA again after unloading and they don't have
# any undesired effects.
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images_again = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
release_memory(pipe)
def test_not_empty_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
lcm_lora = load_file(cached_file)
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertTrue(lcm_lora != {})
release_memory(pipe)
def test_load_unload_load_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
lcm_lora = load_file(cached_file)
previous_state_dict = lcm_lora.copy()
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertDictEqual(lcm_lora, previous_state_dict)
pipe.unload_lora_weights()
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertDictEqual(lcm_lora, previous_state_dict)
release_memory(pipe)
def test_sdv1_5_lcm_lora(self):
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdv1_5_lcm_lora_img2img(self):
pipe = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.png"
)
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"snowy mountain",
generator=generator,
image=init_image,
strength=0.5,
num_inference_steps=4,
guidance_scale=0.5,
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora_img2img.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sd_load_civitai_empty_network_alpha(self):
"""
This test simply checks that loading a LoRA with an empty network alpha works fine
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(torch_device)
pipeline.enable_sequential_cpu_offload()
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
images = pipeline(
"ahri, masterpiece, league of legends",
output_type="np",
generator=torch.manual_seed(156),
num_inference_steps=5,
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.0, 0.0, 0.0, 0.002557, 0.020954, 0.001792, 0.006581, 0.00591, 0.002995])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipeline.unload_lora_weights()
release_memory(pipeline)
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import gc
import importlib
import sys
import time
import unittest
import numpy as np
import torch
from packaging import version
from diffusers import (
ControlNetModel,
EulerDiscreteScheduler,
LCMScheduler,
StableDiffusionXLAdapterPipeline,
StableDiffusionXLControlNetPipeline,
StableDiffusionXLPipeline,
T2IAdapter,
)
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
slow,
torch_device,
)
sys.path.append(".")
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set, state_dicts_almost_equal # noqa: E402
if is_accelerate_available():
from accelerate.utils import release_memory
class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
has_two_text_encoders = True
pipeline_class = StableDiffusionXLPipeline
scheduler_cls = EulerDiscreteScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"timestep_spacing": "leading",
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"attention_head_dim": (2, 4),
"use_linear_projection": True,
"addition_embed_type": "text_time",
"addition_time_embed_dim": 8,
"transformer_layers_per_block": (1, 2),
"projection_class_embeddings_input_dim": 80, # 6 * 8 + 32
"cross_attention_dim": 64,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
"sample_size": 128,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
@slow
@require_torch_gpu
@require_peft_backend
class LoraSDXLIntegrationTests(unittest.TestCase):
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_sdxl_0_9_lora_one(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
lora_filename = "daiton-xl-lora-test.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_0_9_lora_two(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
lora_filename = "saijo.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_0_9_lora_three(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 5e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_1_0_lora(self):
generator = torch.Generator("cpu").manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_lcm_lora(self):
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdxl"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdxl_lcm_lora.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_1_0_lora_fusion(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
# We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
# silently deleted - otherwise this will CPU OOM
pipe.unload_lora_weights()
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
# This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-4
release_memory(pipe)
def test_sdxl_1_0_lora_unfusion(self):
generator = torch.Generator("cpu").manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_with_fusion = images.flatten()
pipe.unfuse_lora()
generator = torch.Generator("cpu").manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_without_fusion = images.flatten()
max_diff = numpy_cosine_similarity_distance(images_with_fusion, images_without_fusion)
assert max_diff < 1e-4
release_memory(pipe)
def test_sdxl_1_0_lora_unfusion_effectivity(self):
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
original_image_slice = images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
_ = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
pipe.unfuse_lora()
# We need to unload the lora weights - in the old API unfuse led to unloading the adapter weights
pipe.unload_lora_weights()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
max_diff = numpy_cosine_similarity_distance(images_without_fusion_slice, original_image_slice)
assert max_diff < 1e-3
release_memory(pipe)
def test_sdxl_1_0_lora_fusion_efficiency(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
end_time = time.time()
elapsed_time_non_fusion = end_time - start_time
del pipe
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.fuse_lora()
# We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
# silently deleted - otherwise this will CPU OOM
pipe.unload_lora_weights()
pipe.enable_model_cpu_offload()
generator = torch.Generator().manual_seed(0)
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
end_time = time.time()
elapsed_time_fusion = end_time - start_time
self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
release_memory(pipe)
def test_sdxl_1_0_last_ben(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
lora_model_id = "TheLastBen/Papercut_SDXL"
lora_filename = "papercut.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_1_0_fuse_unfuse_all(self):
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
unet_sd = copy.deepcopy(pipe.unet.state_dict())
pipe.load_lora_weights(
"davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
)
fused_te_state_dict = pipe.text_encoder.state_dict()
fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
unet_state_dict = pipe.unet.state_dict()
peft_ge_070 = version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0")
def remap_key(key, sd):
# some keys have moved around for PEFT >= 0.7.0, but they should still be loaded correctly
if (key in sd) or (not peft_ge_070):
return key
# instead of linear.weight, we now have linear.base_layer.weight, etc.
if key.endswith(".weight"):
key = key[:-7] + ".base_layer.weight"
elif key.endswith(".bias"):
key = key[:-5] + ".base_layer.bias"
return key
for key, value in text_encoder_1_sd.items():
key = remap_key(key, fused_te_state_dict)
self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
for key, value in text_encoder_2_sd.items():
key = remap_key(key, fused_te_2_state_dict)
self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
for key, value in unet_state_dict.items():
self.assertTrue(torch.allclose(unet_state_dict[key], value))
pipe.fuse_lora()
pipe.unload_lora_weights()
assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
release_memory(pipe)
del unet_sd, text_encoder_1_sd, text_encoder_2_sd
def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_sequential_cpu_offload()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
max_diff = numpy_cosine_similarity_distance(expected, images)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
def test_controlnet_canny_lora(self):
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
)
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
pipe.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "corgi"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
)
images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
assert images[0].shape == (768, 512, 3)
original_image = images[0, -3:, -3:, -1].flatten()
expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
max_diff = numpy_cosine_similarity_distance(expected_image, original_image)
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_t2i_adapter_canny_lora(self):
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16).to(
"cpu"
)
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
adapter=adapter,
torch_dtype=torch.float16,
variant="fp16",
)
pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "toy"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
)
images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
assert images[0].shape == (768, 512, 3)
image_slice = images[0, -3:, -3:, -1].flatten()
expected_slice = np.array([0.4284, 0.4337, 0.4319, 0.4255, 0.4329, 0.4280, 0.4338, 0.4420, 0.4226])
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
@nightly
def test_sequential_fuse_unfuse(self):
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
# 1. round
pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice = images[0, -3:, -3:, -1].flatten()
pipe.unfuse_lora()
# 2. round
pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style", torch_dtype=torch.float16)
pipe.fuse_lora()
pipe.unfuse_lora()
# 3. round
pipe.load_lora_weights("ostris/crayon_style_lora_sdxl", torch_dtype=torch.float16)
pipe.fuse_lora()
pipe.unfuse_lora()
# 4. back to 1st round
pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images_2 = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
max_diff = numpy_cosine_similarity_distance(image_slice, image_slice_2)
assert max_diff < 1e-3
pipe.unload_lora_weights()
release_memory(pipe)
@nightly
def test_integration_logits_multi_adapter(self):
path = "stabilityai/stable-diffusion-xl-base-1.0"
lora_id = "CiroN2022/toy-face"
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
pipe = pipe.to(torch_device)
self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
prompt = "toy_face of a hacker with a hoodie"
lora_scale = 0.9
images = pipe(
prompt=prompt,
num_inference_steps=30,
generator=torch.manual_seed(0),
cross_attention_kwargs={"scale": lora_scale},
output_type="np",
).images
expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
predicted_slice = images[0, -3:, -3:, -1].flatten()
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
pipe.set_adapters("pixel")
prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": lora_scale},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array(
[0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
)
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
# multi-adapter inference
pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": 1.0},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array([0.5888, 0.5897, 0.5946, 0.5888, 0.5935, 0.5946, 0.5857, 0.5891, 0.5909])
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
# Lora disabled
pipe.disable_lora()
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": lora_scale},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array([0.5456, 0.5466, 0.5487, 0.5458, 0.5469, 0.5454, 0.5446, 0.5479, 0.5487])
max_diff = numpy_cosine_similarity_distance(expected_slice_scale, predicted_slice)
assert max_diff < 1e-3
...@@ -12,56 +12,29 @@ ...@@ -12,56 +12,29 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import copy
import gc
import importlib
import os import os
import tempfile import tempfile
import time
import unittest import unittest
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn
from huggingface_hub import hf_hub_download
from huggingface_hub.repocard import RepoCard
from packaging import version
from safetensors.torch import load_file
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import ( from diffusers import (
AutoencoderKL, AutoencoderKL,
AutoPipelineForImage2Image,
AutoPipelineForText2Image,
ControlNetModel,
DDIMScheduler, DDIMScheduler,
DiffusionPipeline,
EulerDiscreteScheduler,
LCMScheduler, LCMScheduler,
StableDiffusionPipeline,
StableDiffusionXLAdapterPipeline,
StableDiffusionXLControlNetPipeline,
StableDiffusionXLPipeline,
T2IAdapter,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils.import_utils import is_accelerate_available, is_peft_available from diffusers.utils.import_utils import is_peft_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
floats_tensor, floats_tensor,
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend, require_peft_backend,
require_peft_version_greater, require_peft_version_greater,
require_torch_gpu,
slow,
torch_device, torch_device,
) )
if is_accelerate_available():
from accelerate.utils import release_memory
if is_peft_available(): if is_peft_available():
from peft import LoraConfig from peft import LoraConfig
from peft.tuners.tuners_utils import BaseTunerLayer from peft.tuners.tuners_utils import BaseTunerLayer
...@@ -80,9 +53,18 @@ def state_dicts_almost_equal(sd1, sd2): ...@@ -80,9 +53,18 @@ def state_dicts_almost_equal(sd1, sd2):
return models_are_equal return models_are_equal
def check_if_lora_correctly_set(model) -> bool:
"""
Checks if the LoRA layers are correctly set with peft
"""
for module in model.modules():
if isinstance(module, BaseTunerLayer):
return True
return False
@require_peft_backend @require_peft_backend
class PeftLoraLoaderMixinTests: class PeftLoraLoaderMixinTests:
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
pipeline_class = None pipeline_class = None
scheduler_cls = None scheduler_cls = None
scheduler_kwargs = None scheduler_kwargs = None
...@@ -177,15 +159,6 @@ class PeftLoraLoaderMixinTests: ...@@ -177,15 +159,6 @@ class PeftLoraLoaderMixinTests:
prepared_inputs["input_ids"] = inputs prepared_inputs["input_ids"] = inputs
return prepared_inputs return prepared_inputs
def check_if_lora_correctly_set(self, model) -> bool:
"""
Checks if the LoRA layers are correctly set with peft
"""
for module in model.modules():
if isinstance(module, BaseTunerLayer):
return True
return False
def test_simple_inference(self): def test_simple_inference(self):
""" """
Tests a simple inference and makes sure it works as expected Tests a simple inference and makes sure it works as expected
...@@ -193,7 +166,7 @@ class PeftLoraLoaderMixinTests: ...@@ -193,7 +166,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs() _, _, inputs = self.get_dummy_inputs()
...@@ -208,7 +181,7 @@ class PeftLoraLoaderMixinTests: ...@@ -208,7 +181,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -216,14 +189,12 @@ class PeftLoraLoaderMixinTests: ...@@ -216,14 +189,12 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -239,7 +210,7 @@ class PeftLoraLoaderMixinTests: ...@@ -239,7 +210,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -247,14 +218,12 @@ class PeftLoraLoaderMixinTests: ...@@ -247,14 +218,12 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -286,7 +255,7 @@ class PeftLoraLoaderMixinTests: ...@@ -286,7 +255,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -294,25 +263,21 @@ class PeftLoraLoaderMixinTests: ...@@ -294,25 +263,21 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.fuse_lora() pipe.fuse_lora()
# Fusing should still keep the LoRA layers # Fusing should still keep the LoRA layers
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -328,7 +293,7 @@ class PeftLoraLoaderMixinTests: ...@@ -328,7 +293,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -336,25 +301,23 @@ class PeftLoraLoaderMixinTests: ...@@ -336,25 +301,23 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.unload_lora_weights() pipe.unload_lora_weights()
# unloading should remove the LoRA layers # unloading should remove the LoRA layers
self.assertFalse( self.assertFalse(
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
) )
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertFalse( self.assertFalse(
self.check_if_lora_correctly_set(pipe.text_encoder_2), check_if_lora_correctly_set(pipe.text_encoder_2),
"Lora not correctly unloaded in text encoder 2", "Lora not correctly unloaded in text encoder 2",
) )
...@@ -371,7 +334,7 @@ class PeftLoraLoaderMixinTests: ...@@ -371,7 +334,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -379,14 +342,12 @@ class PeftLoraLoaderMixinTests: ...@@ -379,14 +342,12 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -415,13 +376,11 @@ class PeftLoraLoaderMixinTests: ...@@ -415,13 +376,11 @@ class PeftLoraLoaderMixinTests:
pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
self.assertTrue( self.assertTrue(
...@@ -436,7 +395,7 @@ class PeftLoraLoaderMixinTests: ...@@ -436,7 +395,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) components, text_lora_config, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -444,14 +403,12 @@ class PeftLoraLoaderMixinTests: ...@@ -444,14 +403,12 @@ class PeftLoraLoaderMixinTests:
self.assertTrue(output_no_lora.shape == (1, 64, 64, 3)) self.assertTrue(output_no_lora.shape == (1, 64, 64, 3))
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder"
)
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -460,16 +417,16 @@ class PeftLoraLoaderMixinTests: ...@@ -460,16 +417,16 @@ class PeftLoraLoaderMixinTests:
pipe.save_pretrained(tmpdirname) pipe.save_pretrained(tmpdirname)
pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname) pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname)
pipe_from_pretrained.to(self.torch_device) pipe_from_pretrained.to(torch_device)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe_from_pretrained.text_encoder), check_if_lora_correctly_set(pipe_from_pretrained.text_encoder),
"Lora not correctly set in text encoder", "Lora not correctly set in text encoder",
) )
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe_from_pretrained.text_encoder_2), check_if_lora_correctly_set(pipe_from_pretrained.text_encoder_2),
"Lora not correctly set in text encoder 2", "Lora not correctly set in text encoder 2",
) )
...@@ -487,7 +444,7 @@ class PeftLoraLoaderMixinTests: ...@@ -487,7 +444,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -497,15 +454,13 @@ class PeftLoraLoaderMixinTests: ...@@ -497,15 +454,13 @@ class PeftLoraLoaderMixinTests:
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images images_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -537,14 +492,12 @@ class PeftLoraLoaderMixinTests: ...@@ -537,14 +492,12 @@ class PeftLoraLoaderMixinTests:
pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))
images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0)).images
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
self.assertTrue( self.assertTrue(
...@@ -560,7 +513,7 @@ class PeftLoraLoaderMixinTests: ...@@ -560,7 +513,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -569,15 +522,13 @@ class PeftLoraLoaderMixinTests: ...@@ -569,15 +522,13 @@ class PeftLoraLoaderMixinTests:
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images output_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -589,7 +540,7 @@ class PeftLoraLoaderMixinTests: ...@@ -589,7 +540,7 @@ class PeftLoraLoaderMixinTests:
**inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5} **inputs, generator=torch.manual_seed(0), cross_attention_kwargs={"scale": 0.5}
).images ).images
self.assertTrue( self.assertTrue(
not np.allclose(output_lora, output_lora_scale, atol=1e-4, rtol=1e-4), not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3),
"Lora + scale should change the output", "Lora + scale should change the output",
) )
...@@ -614,7 +565,7 @@ class PeftLoraLoaderMixinTests: ...@@ -614,7 +565,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -624,27 +575,23 @@ class PeftLoraLoaderMixinTests: ...@@ -624,27 +575,23 @@ class PeftLoraLoaderMixinTests:
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.fuse_lora() pipe.fuse_lora()
# Fusing should still keep the LoRA layers # Fusing should still keep the LoRA layers
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images ouput_fused = pipe(**inputs, generator=torch.manual_seed(0)).images
...@@ -660,7 +607,7 @@ class PeftLoraLoaderMixinTests: ...@@ -660,7 +607,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -669,27 +616,25 @@ class PeftLoraLoaderMixinTests: ...@@ -669,27 +616,25 @@ class PeftLoraLoaderMixinTests:
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.unload_lora_weights() pipe.unload_lora_weights()
# unloading should remove the LoRA layers # unloading should remove the LoRA layers
self.assertFalse( self.assertFalse(
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder"
) )
self.assertFalse(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet") self.assertFalse(check_if_lora_correctly_set(pipe.unet), "Lora not correctly unloaded in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertFalse( self.assertFalse(
self.check_if_lora_correctly_set(pipe.text_encoder_2), check_if_lora_correctly_set(pipe.text_encoder_2),
"Lora not correctly unloaded in text encoder 2", "Lora not correctly unloaded in text encoder 2",
) )
...@@ -707,22 +652,20 @@ class PeftLoraLoaderMixinTests: ...@@ -707,22 +652,20 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.fuse_lora() pipe.fuse_lora()
...@@ -733,14 +676,12 @@ class PeftLoraLoaderMixinTests: ...@@ -733,14 +676,12 @@ class PeftLoraLoaderMixinTests:
output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0)).images
# unloading should remove the LoRA layers # unloading should remove the LoRA layers
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers")
self.check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Unfuse should still keep LoRA layers")
if self.has_two_text_encoders: if self.has_two_text_encoders:
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers"
) )
# Fuse and unfuse should lead to the same results # Fuse and unfuse should lead to the same results
...@@ -757,7 +698,7 @@ class PeftLoraLoaderMixinTests: ...@@ -757,7 +698,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -769,16 +710,14 @@ class PeftLoraLoaderMixinTests: ...@@ -769,16 +710,14 @@ class PeftLoraLoaderMixinTests:
pipe.unet.add_adapter(unet_lora_config, "adapter-1") pipe.unet.add_adapter(unet_lora_config, "adapter-1")
pipe.unet.add_adapter(unet_lora_config, "adapter-2") pipe.unet.add_adapter(unet_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.set_adapters("adapter-1") pipe.set_adapters("adapter-1")
...@@ -825,7 +764,7 @@ class PeftLoraLoaderMixinTests: ...@@ -825,7 +764,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -837,16 +776,14 @@ class PeftLoraLoaderMixinTests: ...@@ -837,16 +776,14 @@ class PeftLoraLoaderMixinTests:
pipe.unet.add_adapter(unet_lora_config, "adapter-1") pipe.unet.add_adapter(unet_lora_config, "adapter-1")
pipe.unet.add_adapter(unet_lora_config, "adapter-2") pipe.unet.add_adapter(unet_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.set_adapters("adapter-1") pipe.set_adapters("adapter-1")
...@@ -915,7 +852,7 @@ class PeftLoraLoaderMixinTests: ...@@ -915,7 +852,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -927,16 +864,14 @@ class PeftLoraLoaderMixinTests: ...@@ -927,16 +864,14 @@ class PeftLoraLoaderMixinTests:
pipe.unet.add_adapter(unet_lora_config, "adapter-1") pipe.unet.add_adapter(unet_lora_config, "adapter-1")
pipe.unet.add_adapter(unet_lora_config, "adapter-2") pipe.unet.add_adapter(unet_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.set_adapters("adapter-1") pipe.set_adapters("adapter-1")
...@@ -987,7 +922,7 @@ class PeftLoraLoaderMixinTests: ...@@ -987,7 +922,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -995,10 +930,8 @@ class PeftLoraLoaderMixinTests: ...@@ -995,10 +930,8 @@ class PeftLoraLoaderMixinTests:
pipe.unet.add_adapter(unet_lora_config, "adapter-1") pipe.unet.add_adapter(unet_lora_config, "adapter-1")
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
# corrupt one LoRA weight with `inf` values # corrupt one LoRA weight with `inf` values
with torch.no_grad(): with torch.no_grad():
...@@ -1025,7 +958,7 @@ class PeftLoraLoaderMixinTests: ...@@ -1025,7 +958,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -1052,7 +985,7 @@ class PeftLoraLoaderMixinTests: ...@@ -1052,7 +985,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") pipe.text_encoder.add_adapter(text_lora_config, "adapter-1")
...@@ -1090,7 +1023,7 @@ class PeftLoraLoaderMixinTests: ...@@ -1090,7 +1023,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
...@@ -1104,16 +1037,14 @@ class PeftLoraLoaderMixinTests: ...@@ -1104,16 +1037,14 @@ class PeftLoraLoaderMixinTests:
pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") pipe.text_encoder.add_adapter(text_lora_config, "adapter-2")
pipe.unet.add_adapter(unet_lora_config, "adapter-2") pipe.unet.add_adapter(unet_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1")
pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2")
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
# set them to multi-adapter inference mode # set them to multi-adapter inference mode
...@@ -1152,22 +1083,20 @@ class PeftLoraLoaderMixinTests: ...@@ -1152,22 +1083,20 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls) components, text_lora_config, unet_lora_config = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_, _, inputs = self.get_dummy_inputs(with_generator=False) _, _, inputs = self.get_dummy_inputs(with_generator=False)
pipe.text_encoder.add_adapter(text_lora_config) pipe.text_encoder.add_adapter(text_lora_config)
pipe.unet.add_adapter(unet_lora_config) pipe.unet.add_adapter(unet_lora_config)
self.assertTrue( self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder")
self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" self.assertTrue(check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
)
self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet")
if self.has_two_text_encoders: if self.has_two_text_encoders:
pipe.text_encoder_2.add_adapter(text_lora_config) pipe.text_encoder_2.add_adapter(text_lora_config)
self.assertTrue( self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2"
) )
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
...@@ -1188,7 +1117,7 @@ class PeftLoraLoaderMixinTests: ...@@ -1188,7 +1117,7 @@ class PeftLoraLoaderMixinTests:
for scheduler_cls in [DDIMScheduler, LCMScheduler]: for scheduler_cls in [DDIMScheduler, LCMScheduler]:
components, _, _ = self.get_dummy_components(scheduler_cls) components, _, _ = self.get_dummy_components(scheduler_cls)
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
pipe = pipe.to(self.torch_device) pipe = pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
_pad_mode = "circular" _pad_mode = "circular"
set_pad_mode(pipe.vae, _pad_mode) set_pad_mode(pipe.vae, _pad_mode)
...@@ -1196,1134 +1125,3 @@ class PeftLoraLoaderMixinTests: ...@@ -1196,1134 +1125,3 @@ class PeftLoraLoaderMixinTests:
_, _, inputs = self.get_dummy_inputs() _, _, inputs = self.get_dummy_inputs()
_ = pipe(**inputs).images _ = pipe(**inputs).images
class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
pipeline_class = StableDiffusionPipeline
scheduler_cls = DDIMScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"clip_sample": False,
"set_alpha_to_one": False,
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"cross_attention_dim": 32,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
@slow
@require_torch_gpu
def test_integration_move_lora_cpu(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
pipe = pipe.to("cuda")
self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder",
)
self.assertTrue(
self.check_if_lora_correctly_set(pipe.unet),
"Lora not correctly set in text encoder",
)
# We will offload the first adapter in CPU and check if the offloading
# has been performed correctly
pipe.set_lora_device(["adapter-1"], "cpu")
for name, module in pipe.unet.named_modules():
if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device == torch.device("cpu"))
elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device != torch.device("cpu"))
for name, module in pipe.text_encoder.named_modules():
if "adapter-1" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device == torch.device("cpu"))
elif "adapter-2" in name and not isinstance(module, (nn.Dropout, nn.Identity)):
self.assertTrue(module.weight.device != torch.device("cpu"))
pipe.set_lora_device(["adapter-1"], 0)
for n, m in pipe.unet.named_modules():
if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
for n, m in pipe.text_encoder.named_modules():
if "adapter-1" in n and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
pipe.set_lora_device(["adapter-1", "adapter-2"], "cuda")
for n, m in pipe.unet.named_modules():
if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
for n, m in pipe.text_encoder.named_modules():
if ("adapter-1" in n or "adapter-2" in n) and not isinstance(m, (nn.Dropout, nn.Identity)):
self.assertTrue(m.weight.device != torch.device("cpu"))
@slow
@require_torch_gpu
def test_integration_logits_with_scale(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
pipe.load_lora_weights(lora_id)
pipe = pipe.to("cuda")
self.assertTrue(
self.check_if_lora_correctly_set(pipe.unet),
"Lora not correctly set in UNet",
)
self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder 2",
)
prompt = "a red sks dog"
images = pipe(
prompt=prompt,
num_inference_steps=15,
cross_attention_kwargs={"scale": 0.5},
generator=torch.manual_seed(0),
output_type="np",
).images
expected_slice_scale = np.array([0.307, 0.283, 0.310, 0.310, 0.300, 0.314, 0.336, 0.314, 0.321])
predicted_slice = images[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
@slow
@require_torch_gpu
def test_integration_logits_no_scale(self):
path = "runwayml/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
pipe.load_lora_weights(lora_id)
pipe = pipe.to("cuda")
self.assertTrue(
self.check_if_lora_correctly_set(pipe.text_encoder),
"Lora not correctly set in text encoder",
)
prompt = "a red sks dog"
images = pipe(prompt=prompt, num_inference_steps=30, generator=torch.manual_seed(0), output_type="np").images
expected_slice_scale = np.array([0.074, 0.064, 0.073, 0.0842, 0.069, 0.0641, 0.0794, 0.076, 0.084])
predicted_slice = images[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
@nightly
@require_torch_gpu
def test_integration_logits_multi_adapter(self):
path = "stabilityai/stable-diffusion-xl-base-1.0"
lora_id = "CiroN2022/toy-face"
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16)
pipe.load_lora_weights(lora_id, weight_name="toy_face_sdxl.safetensors", adapter_name="toy")
pipe = pipe.to("cuda")
self.assertTrue(
self.check_if_lora_correctly_set(pipe.unet),
"Lora not correctly set in Unet",
)
prompt = "toy_face of a hacker with a hoodie"
lora_scale = 0.9
images = pipe(
prompt=prompt,
num_inference_steps=30,
generator=torch.manual_seed(0),
cross_attention_kwargs={"scale": lora_scale},
output_type="np",
).images
expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539])
predicted_slice = images[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
pipe.set_adapters("pixel")
prompt = "pixel art, a hacker with a hoodie, simple, flat colors"
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": lora_scale},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array(
[0.61973065, 0.62018543, 0.62181497, 0.61933696, 0.6208608, 0.620576, 0.6200281, 0.62258327, 0.6259889]
)
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
# multi-adapter inference
pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": 1.0},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array([0.5888, 0.5897, 0.5946, 0.5888, 0.5935, 0.5946, 0.5857, 0.5891, 0.5909])
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
# Lora disabled
pipe.disable_lora()
images = pipe(
prompt,
num_inference_steps=30,
guidance_scale=7.5,
cross_attention_kwargs={"scale": lora_scale},
generator=torch.manual_seed(0),
output_type="np",
).images
predicted_slice = images[0, -3:, -3:, -1].flatten()
expected_slice_scale = np.array([0.5456, 0.5466, 0.5487, 0.5458, 0.5469, 0.5454, 0.5446, 0.5479, 0.5487])
self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))
class StableDiffusionXLLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
has_two_text_encoders = True
pipeline_class = StableDiffusionXLPipeline
scheduler_cls = EulerDiscreteScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"timestep_spacing": "leading",
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"attention_head_dim": (2, 4),
"use_linear_projection": True,
"addition_embed_type": "text_time",
"addition_time_embed_dim": 8,
"transformer_layers_per_block": (1, 2),
"projection_class_embeddings_input_dim": 80, # 6 * 8 + 32
"cross_attention_dim": 64,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
"sample_size": 128,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
@slow
@require_torch_gpu
class LoraIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
pipeline_class = StableDiffusionPipeline
scheduler_cls = DDIMScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"clip_sample": False,
"set_alpha_to_one": False,
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"cross_attention_dim": 32,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_dreambooth_old_format(self):
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe(
"A photo of a sks dog floating in the river", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7207, 0.6787, 0.6010, 0.7478, 0.6838, 0.6064, 0.6984, 0.6443, 0.5785])
self.assertTrue(np.allclose(images, expected, atol=1e-4))
release_memory(pipe)
def test_dreambooth_text_encoder_new_format(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/lora-trained"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe("A photo of a sks dog", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.6628, 0.6138, 0.5390, 0.6625, 0.6130, 0.5463, 0.6166, 0.5788, 0.5359])
self.assertTrue(np.allclose(images, expected, atol=1e-4))
release_memory(pipe)
def test_a1111(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None).to(
torch_device
)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_lycoris(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained(
"hf-internal-testing/Amixx", safety_checker=None, use_safetensors=True, variant="fp16"
).to(torch_device)
lora_model_id = "hf-internal-testing/edgLycorisMugler-light"
lora_filename = "edgLycorisMugler-light.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.6463, 0.658, 0.599, 0.6542, 0.6512, 0.6213, 0.658, 0.6485, 0.6017])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_a1111_with_model_cpu_offload(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload()
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_a1111_with_sequential_cpu_offload(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload()
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3636, 0.3708, 0.3694, 0.3679, 0.3829, 0.3677, 0.3692, 0.3688, 0.3292])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_kohya_sd_v15_with_higher_dimensions(self):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
lora_model_id = "hf-internal-testing/urushisato-lora"
lora_filename = "urushisato_v15.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7165, 0.6616, 0.5833, 0.7504, 0.6718, 0.587, 0.6871, 0.6361, 0.5694])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_vanilla_funetuning(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
card = RepoCard.load(lora_model_id)
base_model_id = card.data.to_dict()["base_model"]
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
pipe = pipe.to(torch_device)
pipe.load_lora_weights(lora_model_id)
images = pipe("A pokemon with blue eyes.", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.7406, 0.699, 0.5963, 0.7493, 0.7045, 0.6096, 0.6886, 0.6388, 0.583])
self.assertTrue(np.allclose(images, expected, atol=1e-4))
release_memory(pipe)
def test_unload_kohya_lora(self):
generator = torch.manual_seed(0)
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
initial_images = initial_images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
lora_filename = "Colored_Icons_by_vizsumit.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images = lora_images[0, -3:, -3:, -1].flatten()
pipe.unload_lora_weights()
generator = torch.manual_seed(0)
unloaded_lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
self.assertFalse(np.allclose(initial_images, lora_images))
self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
release_memory(pipe)
def test_load_unload_load_kohya_lora(self):
# This test ensures that a Kohya-style LoRA can be safely unloaded and then loaded
# without introducing any side-effects. Even though the test uses a Kohya-style
# LoRA, the underlying adapter handling mechanism is format-agnostic.
generator = torch.manual_seed(0)
prompt = "masterpiece, best quality, mountain"
num_inference_steps = 2
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
torch_device
)
initial_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
initial_images = initial_images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/civitai-colored-icons-lora"
lora_filename = "Colored_Icons_by_vizsumit.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images = lora_images[0, -3:, -3:, -1].flatten()
pipe.unload_lora_weights()
generator = torch.manual_seed(0)
unloaded_lora_images = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
unloaded_lora_images = unloaded_lora_images[0, -3:, -3:, -1].flatten()
self.assertFalse(np.allclose(initial_images, lora_images))
self.assertTrue(np.allclose(initial_images, unloaded_lora_images, atol=1e-3))
# make sure we can load a LoRA again after unloading and they don't have
# any undesired effects.
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
generator = torch.manual_seed(0)
lora_images_again = pipe(
prompt, output_type="np", generator=generator, num_inference_steps=num_inference_steps
).images
lora_images_again = lora_images_again[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(lora_images, lora_images_again, atol=1e-3))
release_memory(pipe)
def test_not_empty_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
lcm_lora = load_file(cached_file)
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertTrue(lcm_lora != {})
release_memory(pipe)
def test_load_unload_load_state_dict(self):
# Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
pipe = AutoPipelineForText2Image.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
lcm_lora = load_file(cached_file)
previous_state_dict = lcm_lora.copy()
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertDictEqual(lcm_lora, previous_state_dict)
pipe.unload_lora_weights()
pipe.load_lora_weights(lcm_lora, adapter_name="lcm")
self.assertDictEqual(lcm_lora, previous_state_dict)
release_memory(pipe)
@slow
@require_torch_gpu
class LoraSDXLIntegrationTests(PeftLoraLoaderMixinTests, unittest.TestCase):
has_two_text_encoders = True
pipeline_class = StableDiffusionXLPipeline
scheduler_cls = EulerDiscreteScheduler
scheduler_kwargs = {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"timestep_spacing": "leading",
"steps_offset": 1,
}
unet_kwargs = {
"block_out_channels": (32, 64),
"layers_per_block": 2,
"sample_size": 32,
"in_channels": 4,
"out_channels": 4,
"down_block_types": ("DownBlock2D", "CrossAttnDownBlock2D"),
"up_block_types": ("CrossAttnUpBlock2D", "UpBlock2D"),
"attention_head_dim": (2, 4),
"use_linear_projection": True,
"addition_embed_type": "text_time",
"addition_time_embed_dim": 8,
"transformer_layers_per_block": (1, 2),
"projection_class_embeddings_input_dim": 80, # 6 * 8 + 32
"cross_attention_dim": 64,
}
vae_kwargs = {
"block_out_channels": [32, 64],
"in_channels": 3,
"out_channels": 3,
"down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
"up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
"latent_channels": 4,
"sample_size": 128,
}
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_sdxl_0_9_lora_one(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-daiton-lora"
lora_filename = "daiton-xl-lora-test.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3838, 0.3482, 0.3588, 0.3162, 0.319, 0.3369, 0.338, 0.3366, 0.3213])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_sdxl_0_9_lora_two(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-costumes-lora"
lora_filename = "saijo.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.3137, 0.3269, 0.3355, 0.255, 0.2577, 0.2563, 0.2679, 0.2758, 0.2626])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_sdxl_0_9_lora_three(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9")
lora_model_id = "hf-internal-testing/sdxl-0.9-kamepan-lora"
lora_filename = "kame_sdxl_v2-000020-16rank.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4015, 0.3761, 0.3616, 0.3745, 0.3462, 0.3337, 0.3564, 0.3649, 0.3468])
self.assertTrue(np.allclose(images, expected, atol=5e-3))
release_memory(pipe)
def test_sdxl_1_0_lora(self):
generator = torch.Generator("cpu").manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
self.assertTrue(np.allclose(images, expected, atol=1e-4))
release_memory(pipe)
def test_sdxl_lcm_lora(self):
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdxl"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdxl_lcm_lora.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdv1_5_lcm_lora(self):
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"masterpiece, best quality, mountain", generator=generator, num_inference_steps=4, guidance_scale=0.5
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdv1_5_lcm_lora_img2img(self):
pipe = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.png"
)
generator = torch.Generator("cpu").manual_seed(0)
lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
image = pipe(
"snowy mountain",
generator=generator,
image=init_image,
strength=0.5,
num_inference_steps=4,
guidance_scale=0.5,
).images[0]
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/lcm_lora/sdv15_lcm_lora_img2img.png"
)
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4
pipe.unload_lora_weights()
release_memory(pipe)
def test_sdxl_1_0_lora_fusion(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
# We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
# silently deleted - otherwise this will CPU OOM
pipe.unload_lora_weights()
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
# This way we also test equivalence between LoRA fusion and the non-fusion behaviour.
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
self.assertTrue(np.allclose(images, expected, atol=1e-4))
release_memory(pipe)
def test_sdxl_1_0_lora_unfusion(self):
generator = torch.Generator("cpu").manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
pipe.enable_model_cpu_offload()
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_with_fusion = images.flatten()
pipe.unfuse_lora()
generator = torch.Generator("cpu").manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_without_fusion = images.flatten()
max_diff = numpy_cosine_similarity_distance(images_with_fusion, images_without_fusion)
assert max_diff < 1e-4
release_memory(pipe)
def test_sdxl_1_0_lora_unfusion_effectivity(self):
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
original_image_slice = images[0, -3:, -3:, -1].flatten()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
_ = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
pipe.unfuse_lora()
# We need to unload the lora weights - in the old API unfuse led to unloading the adapter weights
pipe.unload_lora_weights()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images_without_fusion_slice = images[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(original_image_slice, images_without_fusion_slice, atol=1e-3))
release_memory(pipe)
def test_sdxl_1_0_lora_fusion_efficiency(self):
generator = torch.Generator().manual_seed(0)
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
end_time = time.time()
elapsed_time_non_fusion = end_time - start_time
del pipe
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.fuse_lora()
# We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
# silently deleted - otherwise this will CPU OOM
pipe.unload_lora_weights()
pipe.enable_model_cpu_offload()
generator = torch.Generator().manual_seed(0)
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
end_time = time.time()
elapsed_time_fusion = end_time - start_time
self.assertTrue(elapsed_time_fusion < elapsed_time_non_fusion)
release_memory(pipe)
def test_sdxl_1_0_last_ben(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
lora_model_id = "TheLastBen/Papercut_SDXL"
lora_filename = "papercut.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe("papercut.safetensors", output_type="np", generator=generator, num_inference_steps=2).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.5244, 0.4347, 0.4312, 0.4246, 0.4398, 0.4409, 0.4884, 0.4938, 0.4094])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_sdxl_1_0_fuse_unfuse_all(self):
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
text_encoder_1_sd = copy.deepcopy(pipe.text_encoder.state_dict())
text_encoder_2_sd = copy.deepcopy(pipe.text_encoder_2.state_dict())
unet_sd = copy.deepcopy(pipe.unet.state_dict())
pipe.load_lora_weights(
"davizca87/sun-flower", weight_name="snfw3rXL-000004.safetensors", torch_dtype=torch.float16
)
fused_te_state_dict = pipe.text_encoder.state_dict()
fused_te_2_state_dict = pipe.text_encoder_2.state_dict()
unet_state_dict = pipe.unet.state_dict()
peft_ge_070 = version.parse(importlib.metadata.version("peft")) >= version.parse("0.7.0")
def remap_key(key, sd):
# some keys have moved around for PEFT >= 0.7.0, but they should still be loaded correctly
if (key in sd) or (not peft_ge_070):
return key
# instead of linear.weight, we now have linear.base_layer.weight, etc.
if key.endswith(".weight"):
key = key[:-7] + ".base_layer.weight"
elif key.endswith(".bias"):
key = key[:-5] + ".base_layer.bias"
return key
for key, value in text_encoder_1_sd.items():
key = remap_key(key, fused_te_state_dict)
self.assertTrue(torch.allclose(fused_te_state_dict[key], value))
for key, value in text_encoder_2_sd.items():
key = remap_key(key, fused_te_2_state_dict)
self.assertTrue(torch.allclose(fused_te_2_state_dict[key], value))
for key, value in unet_state_dict.items():
self.assertTrue(torch.allclose(unet_state_dict[key], value))
pipe.fuse_lora()
pipe.unload_lora_weights()
assert not state_dicts_almost_equal(text_encoder_1_sd, pipe.text_encoder.state_dict())
assert not state_dicts_almost_equal(text_encoder_2_sd, pipe.text_encoder_2.state_dict())
assert not state_dicts_almost_equal(unet_sd, pipe.unet.state_dict())
release_memory(pipe)
del unet_sd, text_encoder_1_sd, text_encoder_2_sd
def test_sdxl_1_0_lora_with_sequential_cpu_offloading(self):
generator = torch.Generator().manual_seed(0)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_sequential_cpu_offload()
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.4468, 0.4087, 0.4134, 0.366, 0.3202, 0.3505, 0.3786, 0.387, 0.3535])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipe)
def test_sd_load_civitai_empty_network_alpha(self):
"""
This test simply checks that loading a LoRA with an empty network alpha works fine
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to("cuda")
pipeline.enable_sequential_cpu_offload()
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
images = pipeline(
"ahri, masterpiece, league of legends",
output_type="np",
generator=torch.manual_seed(156),
num_inference_steps=5,
).images
images = images[0, -3:, -3:, -1].flatten()
expected = np.array([0.0, 0.0, 0.0, 0.002557, 0.020954, 0.001792, 0.006581, 0.00591, 0.002995])
self.assertTrue(np.allclose(images, expected, atol=1e-3))
release_memory(pipeline)
def test_controlnet_canny_lora(self):
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
)
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors")
pipe.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "corgi"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
)
images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
assert images[0].shape == (768, 512, 3)
original_image = images[0, -3:, -3:, -1].flatten()
expected_image = np.array([0.4574, 0.4461, 0.4435, 0.4462, 0.4396, 0.439, 0.4474, 0.4486, 0.4333])
assert np.allclose(original_image, expected_image, atol=1e-04)
release_memory(pipe)
def test_sdxl_t2i_adapter_canny_lora(self):
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", torch_dtype=torch.float16).to(
"cpu"
)
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
adapter=adapter,
torch_dtype=torch.float16,
variant="fp16",
)
pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors")
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "toy"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/t2i_adapter/toy_canny.png"
)
images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=3).images
assert images[0].shape == (768, 512, 3)
image_slice = images[0, -3:, -3:, -1].flatten()
expected_slice = np.array([0.4284, 0.4337, 0.4319, 0.4255, 0.4329, 0.4280, 0.4338, 0.4420, 0.4226])
assert numpy_cosine_similarity_distance(image_slice, expected_slice) < 1e-4
@nightly
def test_sequential_fuse_unfuse(self):
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
# 1. round
pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
pipe.to("cuda")
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice = images[0, -3:, -3:, -1].flatten()
pipe.unfuse_lora()
# 2. round
pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style", torch_dtype=torch.float16)
pipe.fuse_lora()
pipe.unfuse_lora()
# 3. round
pipe.load_lora_weights("ostris/crayon_style_lora_sdxl", torch_dtype=torch.float16)
pipe.fuse_lora()
pipe.unfuse_lora()
# 4. back to 1st round
pipe.load_lora_weights("Pclanglais/TintinIA", torch_dtype=torch.float16)
pipe.fuse_lora()
generator = torch.Generator().manual_seed(0)
images_2 = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
).images
image_slice_2 = images_2[0, -3:, -3:, -1].flatten()
self.assertTrue(np.allclose(image_slice, image_slice_2, atol=1e-3))
release_memory(pipe)
...@@ -34,6 +34,7 @@ For a check only (as used in `make quality`) run: ...@@ -34,6 +34,7 @@ For a check only (as used in `make quality`) run:
python utils/custom_init_isort.py --check_only python utils/custom_init_isort.py --check_only
``` ```
""" """
import argparse import argparse
import os import os
import re import re
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
Script to close stale issue. Taken in part from the AllenNLP repository. Script to close stale issue. Taken in part from the AllenNLP repository.
https://github.com/allenai/allennlp. https://github.com/allenai/allennlp.
""" """
import os import os
from datetime import datetime as dt from datetime import datetime as dt
from datetime import timezone from datetime import timezone
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment