Unverified Commit b3e5cd6b authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Kandinsky] Add combined pipelines / Fix cpu model offload / Fix inpainting (#4207)



* Add combined pipeline

* Download readme

* Upload

* up

* up

* fix final

* Add enable model cpu offload kandinsky

* finish

* finish

* Fix

* fix more

* make style

* fix kandinsky mask

* fix inpainting test

* add callbacks

* add tests

* fix tests

* Apply suggestions from code review
Co-authored-by: default avatarYiYi Xu <yixu310@gmail.com>

* docs

* docs

* correct docs

* fix tests

* add warning

* correct docs

---------
Co-authored-by: default avatarYiYi Xu <yixu310@gmail.com>
parent b37dc3b3
...@@ -7,6 +7,8 @@ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTo ...@@ -7,6 +7,8 @@ from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTo
from ...models import PriorTransformer from ...models import PriorTransformer
from ...schedulers import UnCLIPScheduler from ...schedulers import UnCLIPScheduler
from ...utils import ( from ...utils import (
is_accelerate_available,
is_accelerate_version,
logging, logging,
randn_tensor, randn_tensor,
replace_example_docstring, replace_example_docstring,
...@@ -162,7 +164,7 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline): ...@@ -162,7 +164,7 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.FloatTensor] = None,
negative_prior_prompt: Optional[str] = None, negative_prior_prompt: Optional[str] = None,
negative_prompt: Union[str] = "", negative_prompt: str = "",
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
device=None, device=None,
): ):
...@@ -392,6 +394,35 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline): ...@@ -392,6 +394,35 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
return prompt_embeds, text_encoder_hidden_states, text_mask return prompt_embeds, text_encoder_hidden_states, text_mask
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.prior]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.prior_hook = hook
_, hook = cpu_offload_with_hook(self.image_encoder, device, prev_module_hook=self.prior_hook)
self.final_offload_hook = hook
@torch.no_grad() @torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING) @replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__( def __call__(
...@@ -549,8 +580,12 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline): ...@@ -549,8 +580,12 @@ class KandinskyV22PriorEmb2EmbPipeline(DiffusionPipeline):
# if negative prompt has been defined, we retrieve split the image embedding into two # if negative prompt has been defined, we retrieve split the image embedding into two
if negative_prompt is None: if negative_prompt is None:
zero_embeds = self.get_zero_embed(latents.shape[0], device=latents.device) zero_embeds = self.get_zero_embed(latents.shape[0], device=latents.device)
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.final_offload_hook.offload()
else: else:
image_embeddings, zero_embeds = image_embeddings.chunk(2) image_embeddings, zero_embeds = image_embeddings.chunk(2)
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.prior_hook.offload()
if output_type not in ["pt", "np"]: if output_type not in ["pt", "np"]:
raise ValueError(f"Only the output types `pt` and `np` are supported not output_type={output_type}") raise ValueError(f"Only the output types `pt` and `np` are supported not output_type={output_type}")
......
...@@ -28,7 +28,7 @@ from typing import Any, Callable, Dict, List, Optional, Union ...@@ -28,7 +28,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
import numpy as np import numpy as np
import PIL import PIL
import torch import torch
from huggingface_hub import hf_hub_download, model_info, snapshot_download from huggingface_hub import ModelCard, hf_hub_download, model_info, snapshot_download
from packaging import version from packaging import version
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from tqdm.auto import tqdm from tqdm.auto import tqdm
...@@ -78,6 +78,7 @@ INDEX_FILE = "diffusion_pytorch_model.bin" ...@@ -78,6 +78,7 @@ INDEX_FILE = "diffusion_pytorch_model.bin"
CUSTOM_PIPELINE_FILE_NAME = "pipeline.py" CUSTOM_PIPELINE_FILE_NAME = "pipeline.py"
DUMMY_MODULES_FOLDER = "diffusers.utils" DUMMY_MODULES_FOLDER = "diffusers.utils"
TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils" TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils"
CONNECTED_PIPES_KEYS = ["prior"]
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -322,7 +323,9 @@ def get_class_obj_and_candidates(library_name, class_name, importable_classes, p ...@@ -322,7 +323,9 @@ def get_class_obj_and_candidates(library_name, class_name, importable_classes, p
return class_obj, class_candidates return class_obj, class_candidates
def _get_pipeline_class(class_obj, config, custom_pipeline=None, cache_dir=None, revision=None): def _get_pipeline_class(
class_obj, config, load_connected_pipeline=False, custom_pipeline=None, cache_dir=None, revision=None
):
if custom_pipeline is not None: if custom_pipeline is not None:
if custom_pipeline.endswith(".py"): if custom_pipeline.endswith(".py"):
path = Path(custom_pipeline) path = Path(custom_pipeline)
...@@ -340,7 +343,22 @@ def _get_pipeline_class(class_obj, config, custom_pipeline=None, cache_dir=None, ...@@ -340,7 +343,22 @@ def _get_pipeline_class(class_obj, config, custom_pipeline=None, cache_dir=None,
return class_obj return class_obj
diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0]) diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0])
return getattr(diffusers_module, config["_class_name"]) pipeline_cls = getattr(diffusers_module, config["_class_name"])
if load_connected_pipeline:
from .auto_pipeline import _get_connected_pipeline
connected_pipeline_cls = _get_connected_pipeline(pipeline_cls)
if connected_pipeline_cls is not None:
logger.info(
f"Loading connected pipeline {connected_pipeline_cls.__name__} instead of {pipeline_cls.__name__} as specified via `load_connected_pipeline=True`"
)
else:
logger.info(f"{pipeline_cls.__name__} has no connected pipeline class. Loading {pipeline_cls.__name__}.")
pipeline_cls = connected_pipeline_cls or pipeline_cls
return pipeline_cls
def load_sub_model( def load_sub_model(
...@@ -475,6 +493,7 @@ class DiffusionPipeline(ConfigMixin): ...@@ -475,6 +493,7 @@ class DiffusionPipeline(ConfigMixin):
config_name = "model_index.json" config_name = "model_index.json"
_optional_components = [] _optional_components = []
_exclude_from_cpu_offload = [] _exclude_from_cpu_offload = []
_load_connected_pipes = False
def register_modules(self, **kwargs): def register_modules(self, **kwargs):
# import it here to avoid circular import # import it here to avoid circular import
...@@ -875,6 +894,7 @@ class DiffusionPipeline(ConfigMixin): ...@@ -875,6 +894,7 @@ class DiffusionPipeline(ConfigMixin):
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
variant = kwargs.pop("variant", None) variant = kwargs.pop("variant", None)
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
# 1. Download the checkpoints and configs # 1. Download the checkpoints and configs
# use snapshot download here to get it working from from_pretrained # use snapshot download here to get it working from from_pretrained
...@@ -893,6 +913,7 @@ class DiffusionPipeline(ConfigMixin): ...@@ -893,6 +913,7 @@ class DiffusionPipeline(ConfigMixin):
custom_pipeline=custom_pipeline, custom_pipeline=custom_pipeline,
custom_revision=custom_revision, custom_revision=custom_revision,
variant=variant, variant=variant,
load_connected_pipeline=load_connected_pipeline,
**kwargs, **kwargs,
) )
else: else:
...@@ -920,7 +941,12 @@ class DiffusionPipeline(ConfigMixin): ...@@ -920,7 +941,12 @@ class DiffusionPipeline(ConfigMixin):
# 3. Load the pipeline class, if using custom module then load it from the hub # 3. Load the pipeline class, if using custom module then load it from the hub
# if we load from explicit class, let's use it # if we load from explicit class, let's use it
pipeline_class = _get_pipeline_class( pipeline_class = _get_pipeline_class(
cls, config_dict, custom_pipeline=custom_pipeline, cache_dir=cache_dir, revision=custom_revision cls,
config_dict,
load_connected_pipeline=load_connected_pipeline,
custom_pipeline=custom_pipeline,
cache_dir=cache_dir,
revision=custom_revision,
) )
# DEPRECATED: To be removed in 1.0.0 # DEPRECATED: To be removed in 1.0.0
...@@ -1061,6 +1087,42 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1061,6 +1087,42 @@ class DiffusionPipeline(ConfigMixin):
init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...) init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...)
if pipeline_class._load_connected_pipes and os.path.isfile(os.path.join(cached_folder, "README.md")):
modelcard = ModelCard.load(os.path.join(cached_folder, "README.md"))
connected_pipes = {prefix: getattr(modelcard.data, prefix, [None])[0] for prefix in CONNECTED_PIPES_KEYS}
load_kwargs = {
"cache_dir": cache_dir,
"resume_download": resume_download,
"force_download": force_download,
"proxies": proxies,
"local_files_only": local_files_only,
"use_auth_token": use_auth_token,
"revision": revision,
"torch_dtype": torch_dtype,
"custom_pipeline": custom_pipeline,
"custom_revision": custom_revision,
"provider": provider,
"sess_options": sess_options,
"device_map": device_map,
"max_memory": max_memory,
"offload_folder": offload_folder,
"offload_state_dict": offload_state_dict,
"low_cpu_mem_usage": low_cpu_mem_usage,
"variant": variant,
"use_safetensors": use_safetensors,
}
connected_pipes = {
prefix: DiffusionPipeline.from_pretrained(repo_id, **load_kwargs.copy())
for prefix, repo_id in connected_pipes.items()
if repo_id is not None
}
for prefix, connected_pipe in connected_pipes.items():
# add connected pipes to `init_kwargs` with <prefix>_<component_name>, e.g. "prior_text_encoder"
init_kwargs.update(
{"_".join([prefix, name]): component for name, component in connected_pipe.components.items()}
)
# 7. Potentially add passed objects if expected # 7. Potentially add passed objects if expected
missing_modules = set(expected_modules) - set(init_kwargs.keys()) missing_modules = set(expected_modules) - set(init_kwargs.keys())
passed_modules = list(passed_class_obj.keys()) passed_modules = list(passed_class_obj.keys())
...@@ -1231,6 +1293,7 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1231,6 +1293,7 @@ class DiffusionPipeline(ConfigMixin):
custom_revision = kwargs.pop("custom_revision", None) custom_revision = kwargs.pop("custom_revision", None)
variant = kwargs.pop("variant", None) variant = kwargs.pop("variant", None)
use_safetensors = kwargs.pop("use_safetensors", None) use_safetensors = kwargs.pop("use_safetensors", None)
load_connected_pipeline = kwargs.pop("load_connected_pipeline", False)
if use_safetensors and not is_safetensors_available(): if use_safetensors and not is_safetensors_available():
raise ValueError( raise ValueError(
...@@ -1242,7 +1305,6 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1242,7 +1305,6 @@ class DiffusionPipeline(ConfigMixin):
use_safetensors = is_safetensors_available() use_safetensors = is_safetensors_available()
allow_pickle = True allow_pickle = True
pipeline_is_cached = False
allow_patterns = None allow_patterns = None
ignore_patterns = None ignore_patterns = None
...@@ -1322,7 +1384,12 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1322,7 +1384,12 @@ class DiffusionPipeline(ConfigMixin):
# retrieve passed components that should not be downloaded # retrieve passed components that should not be downloaded
pipeline_class = _get_pipeline_class( pipeline_class = _get_pipeline_class(
cls, config_dict, custom_pipeline=custom_pipeline, cache_dir=cache_dir, revision=custom_revision cls,
config_dict,
load_connected_pipeline=load_connected_pipeline,
custom_pipeline=custom_pipeline,
cache_dir=cache_dir,
revision=custom_revision,
) )
expected_components, _ = cls._get_signature_keys(pipeline_class) expected_components, _ = cls._get_signature_keys(pipeline_class)
passed_components = [k for k in expected_components if k in kwargs] passed_components = [k for k in expected_components if k in kwargs]
...@@ -1367,6 +1434,10 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1367,6 +1434,10 @@ class DiffusionPipeline(ConfigMixin):
allow_patterns = [ allow_patterns = [
p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components) p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components)
] ]
if pipeline_class._load_connected_pipes:
allow_patterns.append("README.md")
# Don't download index files of forbidden patterns either # Don't download index files of forbidden patterns either
ignore_patterns = ignore_patterns + [f"{i}.index.*json" for i in ignore_patterns] ignore_patterns = ignore_patterns + [f"{i}.index.*json" for i in ignore_patterns]
...@@ -1390,7 +1461,7 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1390,7 +1461,7 @@ class DiffusionPipeline(ConfigMixin):
# download all allow_patterns - ignore_patterns # download all allow_patterns - ignore_patterns
try: try:
return snapshot_download( cached_folder = snapshot_download(
pretrained_model_name, pretrained_model_name,
cache_dir=cache_dir, cache_dir=cache_dir,
resume_download=resume_download, resume_download=resume_download,
...@@ -1402,6 +1473,15 @@ class DiffusionPipeline(ConfigMixin): ...@@ -1402,6 +1473,15 @@ class DiffusionPipeline(ConfigMixin):
ignore_patterns=ignore_patterns, ignore_patterns=ignore_patterns,
user_agent=user_agent, user_agent=user_agent,
) )
if pipeline_class._load_connected_pipes:
modelcard = ModelCard.load(os.path.join(cached_folder, "README.md"))
connected_pipes = sum([getattr(modelcard.data, k, []) for k in CONNECTED_PIPES_KEYS], [])
for connected_pipe_repo_id in connected_pipes:
DiffusionPipeline.download(connected_pipe_repo_id)
return cached_folder
except FileNotFoundError: except FileNotFoundError:
# Means we tried to load pipeline with `local_files_only=True` but the files have not been found in local cache. # Means we tried to load pipeline with `local_files_only=True` but the files have not been found in local cache.
# This can happen in two cases: # This can happen in two cases:
......
...@@ -167,6 +167,36 @@ class ImageTextPipelineOutput(metaclass=DummyObject): ...@@ -167,6 +167,36 @@ class ImageTextPipelineOutput(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class KandinskyCombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyImg2ImgCombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyImg2ImgPipeline(metaclass=DummyObject): class KandinskyImg2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
...@@ -182,6 +212,21 @@ class KandinskyImg2ImgPipeline(metaclass=DummyObject): ...@@ -182,6 +212,21 @@ class KandinskyImg2ImgPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class KandinskyInpaintCombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyInpaintPipeline(metaclass=DummyObject): class KandinskyInpaintPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
...@@ -227,6 +272,21 @@ class KandinskyPriorPipeline(metaclass=DummyObject): ...@@ -227,6 +272,21 @@ class KandinskyPriorPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class KandinskyV22CombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyV22ControlnetImg2ImgPipeline(metaclass=DummyObject): class KandinskyV22ControlnetImg2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
...@@ -257,6 +317,21 @@ class KandinskyV22ControlnetPipeline(metaclass=DummyObject): ...@@ -257,6 +317,21 @@ class KandinskyV22ControlnetPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class KandinskyV22Img2ImgCombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyV22Img2ImgPipeline(metaclass=DummyObject): class KandinskyV22Img2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
...@@ -272,6 +347,21 @@ class KandinskyV22Img2ImgPipeline(metaclass=DummyObject): ...@@ -272,6 +347,21 @@ class KandinskyV22Img2ImgPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"]) requires_backends(cls, ["torch", "transformers"])
class KandinskyV22InpaintCombinedPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class KandinskyV22InpaintPipeline(metaclass=DummyObject): class KandinskyV22InpaintPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"] _backends = ["torch", "transformers"]
......
...@@ -64,6 +64,10 @@ def randn_tensor( ...@@ -64,6 +64,10 @@ def randn_tensor(
elif gen_device_type != device.type and gen_device_type == "cuda": elif gen_device_type != device.type and gen_device_type == "cuda":
raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.") raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
# make sure generator list of length 1 is treated like a non-list
if isinstance(generator, list) and len(generator) == 1:
generator = generator[0]
if isinstance(generator, list): if isinstance(generator, list):
shape = (1,) + shape[1:] shape = (1,) + shape[1:]
latents = [ latents = [
......
...@@ -32,30 +32,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -32,30 +32,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyPipeline
params = [
"prompt",
"image_embeds",
"negative_image_embeds",
]
batch_params = ["prompt", "negative_prompt", "image_embeds", "negative_image_embeds"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -74,7 +51,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -74,7 +51,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_tokenizer(self): def dummy_tokenizer(self):
...@@ -196,6 +173,39 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -196,6 +173,39 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
} }
return inputs return inputs
class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyPipeline
params = [
"prompt",
"image_embeds",
"negative_image_embeds",
]
batch_params = ["prompt", "negative_prompt", "image_embeds", "negative_image_embeds"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Dummies()
return dummy.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummy = Dummies()
return dummy.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky(self): def test_kandinsky(self):
device = "cpu" device = "cpu"
...@@ -219,9 +229,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -219,9 +229,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array([1.0000, 1.0000, 0.2766, 1.0000, 0.5447, 0.1737, 1.0000, 0.4316, 0.9024])
[0.328663, 1.0, 0.23216873, 1.0, 0.92717564, 0.4639046, 0.96894777, 0.31713378, 0.6293953]
)
assert ( assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
......
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from diffusers import KandinskyCombinedPipeline, KandinskyImg2ImgCombinedPipeline, KandinskyInpaintCombinedPipeline
from diffusers.utils import torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu
from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies
from .test_kandinsky_img2img import Dummies as Img2ImgDummies
from .test_kandinsky_inpaint import Dummies as InpaintDummies
from .test_kandinsky_prior import Dummies as PriorDummies
enable_full_determinism()
class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyCombinedPipeline
params = [
"prompt",
]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Dummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(
{
"height": 64,
"width": 64,
}
)
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.0000, 0.0000, 0.6777, 0.1363, 0.3624, 0.7868, 0.3869, 0.3395, 0.5068])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyImg2ImgCombinedPipeline
params = ["prompt", "image"]
batch_params = ["prompt", "negative_prompt", "image"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Img2ImgDummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
dummy = Img2ImgDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(dummy.get_dummy_inputs(device=device, seed=seed))
inputs.pop("image_embeds")
inputs.pop("negative_image_embeds")
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4260, 0.3596, 0.4571, 0.3890, 0.4087, 0.5137, 0.4819, 0.4116, 0.5053])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyInpaintCombinedPipeline
params = ["prompt", "image", "mask_image"]
batch_params = ["prompt", "negative_prompt", "image", "mask_image"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = InpaintDummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
dummy = InpaintDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(dummy.get_dummy_inputs(device=device, seed=seed))
inputs.pop("image_embeds")
inputs.pop("negative_image_embeds")
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.0477, 0.0808, 0.2972, 0.2705, 0.3620, 0.6247, 0.4464, 0.2870, 0.3530])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
...@@ -40,32 +40,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -40,32 +40,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyImg2ImgPipeline
params = ["prompt", "image_embeds", "negative_image_embeds", "image"]
batch_params = [
"prompt",
"negative_prompt",
"image_embeds",
"negative_image_embeds",
"image",
]
required_optional_params = [
"generator",
"height",
"width",
"strength",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -84,7 +59,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -84,7 +59,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_tokenizer(self): def dummy_tokenizer(self):
...@@ -216,6 +191,41 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -216,6 +191,41 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
} }
return inputs return inputs
class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyImg2ImgPipeline
params = ["prompt", "image_embeds", "negative_image_embeds", "image"]
batch_params = [
"prompt",
"negative_prompt",
"image_embeds",
"negative_image_embeds",
"image",
]
required_optional_params = [
"generator",
"height",
"width",
"strength",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_img2img(self): def test_kandinsky_img2img(self):
device = "cpu" device = "cpu"
...@@ -239,9 +249,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -239,9 +249,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array([0.5816, 0.5872, 0.4634, 0.5982, 0.4767, 0.4710, 0.4669, 0.4717, 0.4966])
[0.61474943, 0.6073539, 0.43308544, 0.5928269, 0.47493595, 0.46755973, 0.4613838, 0.45368797, 0.50119233]
)
assert ( assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
......
...@@ -33,33 +33,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -33,33 +33,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyInpaintPipeline
params = ["prompt", "image_embeds", "negative_image_embeds", "image", "mask_image"]
batch_params = [
"prompt",
"negative_prompt",
"image_embeds",
"negative_image_embeds",
"image",
"mask_image",
]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -78,7 +52,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -78,7 +52,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_tokenizer(self): def dummy_tokenizer(self):
...@@ -189,8 +163,8 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -189,8 +163,8 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image = image.cpu().permute(0, 2, 3, 1)[0] image = image.cpu().permute(0, 2, 3, 1)[0]
init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((256, 256)) init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((256, 256))
# create mask # create mask
mask = np.ones((64, 64), dtype=np.float32) mask = np.zeros((64, 64), dtype=np.float32)
mask[:32, :32] = 0 mask[:32, :32] = 1
if str(device).startswith("mps"): if str(device).startswith("mps"):
generator = torch.manual_seed(seed) generator = torch.manual_seed(seed)
...@@ -211,6 +185,42 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -211,6 +185,42 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
} }
return inputs return inputs
class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyInpaintPipeline
params = ["prompt", "image_embeds", "negative_image_embeds", "image", "mask_image"]
batch_params = [
"prompt",
"negative_prompt",
"image_embeds",
"negative_image_embeds",
"image",
"mask_image",
]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_inpaint(self): def test_kandinsky_inpaint(self):
device = "cpu" device = "cpu"
...@@ -232,13 +242,9 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -232,13 +242,9 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
print(f"image.shape {image.shape}")
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array([0.8222, 0.8896, 0.4373, 0.8088, 0.4905, 0.2609, 0.6816, 0.4291, 0.5129])
[0.8326919, 0.73790467, 0.20918581, 0.9309612, 0.5511791, 0.43713328, 0.5513321, 0.49922934, 0.59497786]
)
assert ( assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -296,8 +302,8 @@ class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase): ...@@ -296,8 +302,8 @@ class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
init_image = load_image( init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png"
) )
mask = np.ones((768, 768), dtype=np.float32) mask = np.zeros((768, 768), dtype=np.float32)
mask[:250, 250:-250] = 0 mask[:250, 250:-250] = 1
prompt = "a hat" prompt = "a hat"
......
...@@ -37,22 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin ...@@ -37,22 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin
enable_full_determinism() enable_full_determinism()
class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyPriorPipeline
params = ["prompt"]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"num_images_per_prompt",
"generator",
"num_inference_steps",
"latents",
"negative_prompt",
"guidance_scale",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -183,6 +168,31 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -183,6 +168,31 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
} }
return inputs return inputs
class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyPriorPipeline
params = ["prompt"]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"num_images_per_prompt",
"generator",
"num_inference_steps",
"latents",
"negative_prompt",
"guidance_scale",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Dummies()
return dummy.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummy = Dummies()
return dummy.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_prior(self): def test_kandinsky_prior(self):
device = "cpu" device = "cpu"
......
...@@ -30,28 +30,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -30,28 +30,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyV22Pipeline
params = [
"image_embeds",
"negative_image_embeds",
]
batch_params = ["image_embeds", "negative_image_embeds"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -70,7 +49,7 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -70,7 +49,7 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_unet(self): def dummy_unet(self):
...@@ -166,6 +145,37 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -166,6 +145,37 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
} }
return inputs return inputs
class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22Pipeline
params = [
"image_embeds",
"negative_image_embeds",
]
batch_params = ["image_embeds", "negative_image_embeds"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def test_kandinsky(self): def test_kandinsky(self):
device = "cpu" device = "cpu"
...@@ -189,9 +199,7 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -189,9 +199,7 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array([0.3420, 0.9505, 0.3919, 1.0000, 0.5188, 0.3109, 0.6139, 0.5624, 0.6811])
[0.6237976, 1.0, 0.36441332, 1.0, 0.70639634, 0.29877186, 0.85652125, 0.5216843, 0.54454046]
)
assert ( assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
......
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from diffusers import (
KandinskyV22CombinedPipeline,
KandinskyV22Img2ImgCombinedPipeline,
KandinskyV22InpaintCombinedPipeline,
)
from diffusers.utils import torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu
from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies
from .test_kandinsky_img2img import Dummies as Img2ImgDummies
from .test_kandinsky_inpaint import Dummies as InpaintDummies
from .test_kandinsky_prior import Dummies as PriorDummies
enable_full_determinism()
class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22CombinedPipeline
params = [
"prompt",
]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Dummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(
{
"height": 64,
"width": 64,
}
)
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.3013, 0.0471, 0.5176, 0.1817, 0.2566, 0.7076, 0.6712, 0.4421, 0.7503])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22Img2ImgCombinedPipeline
params = ["prompt", "image"]
batch_params = ["prompt", "negative_prompt", "image"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = Img2ImgDummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
dummy = Img2ImgDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(dummy.get_dummy_inputs(device=device, seed=seed))
inputs.pop("image_embeds")
inputs.pop("negative_image_embeds")
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4353, 0.4710, 0.5128, 0.4806, 0.5054, 0.5348, 0.5224, 0.4603, 0.5025])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22InpaintCombinedPipeline
params = ["prompt", "image", "mask_image"]
batch_params = ["prompt", "negative_prompt", "image", "mask_image"]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"negative_prompt",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummy = InpaintDummies()
prior_dummy = PriorDummies()
components = dummy.get_dummy_components()
components.update({f"prior_{k}": v for k, v in prior_dummy.get_dummy_components().items()})
return components
def get_dummy_inputs(self, device, seed=0):
prior_dummy = PriorDummies()
dummy = InpaintDummies()
inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed)
inputs.update(dummy.get_dummy_inputs(device=device, seed=seed))
inputs.pop("image_embeds")
inputs.pop("negative_image_embeds")
return inputs
def test_kandinsky(self):
device = "cpu"
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(device))
image = output.images
image_from_tuple = pipe(
**self.get_dummy_inputs(device),
return_dict=False,
)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.5039, 0.4926, 0.4898, 0.4978, 0.4838, 0.4942, 0.4738, 0.4702, 0.4816])
assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
assert (
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components).to(torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
pipes.append(sd_pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device)
image = pipe(**inputs).images
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3
assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1e-2)
...@@ -37,29 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -37,29 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyV22Img2ImgPipeline
params = ["image_embeds", "negative_image_embeds", "image"]
batch_params = [
"image_embeds",
"negative_image_embeds",
"image",
]
required_optional_params = [
"generator",
"height",
"width",
"strength",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -78,7 +56,7 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -78,7 +56,7 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_unet(self): def dummy_unet(self):
...@@ -184,6 +162,38 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -184,6 +162,38 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
} }
return inputs return inputs
class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22Img2ImgPipeline
params = ["image_embeds", "negative_image_embeds", "image"]
batch_params = [
"image_embeds",
"negative_image_embeds",
"image",
]
required_optional_params = [
"generator",
"height",
"width",
"strength",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_img2img(self): def test_kandinsky_img2img(self):
device = "cpu" device = "cpu"
...@@ -207,9 +217,7 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -207,9 +217,7 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array([0.5712, 0.5443, 0.4725, 0.6195, 0.5184, 0.4651, 0.4473, 0.4590, 0.5016])
[0.6199778, 0.63984406, 0.46145785, 0.62944984, 0.5622215, 0.47306132, 0.47441456, 0.4607606, 0.48719263]
)
assert ( assert (
np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_slice.flatten()}"
......
...@@ -37,30 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe ...@@ -37,30 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_diffe
enable_full_determinism() enable_full_determinism()
class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyV22InpaintPipeline
params = ["image_embeds", "negative_image_embeds", "image", "mask_image"]
batch_params = [
"image_embeds",
"negative_image_embeds",
"image",
"mask_image",
]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -79,7 +56,7 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -79,7 +56,7 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@property @property
def cross_attention_dim(self): def cross_attention_dim(self):
return 100 return 32
@property @property
def dummy_unet(self): def dummy_unet(self):
...@@ -165,8 +142,8 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -165,8 +142,8 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
image = image.cpu().permute(0, 2, 3, 1)[0] image = image.cpu().permute(0, 2, 3, 1)[0]
init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((256, 256)) init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((256, 256))
# create mask # create mask
mask = np.ones((64, 64), dtype=np.float32) mask = np.zeros((64, 64), dtype=np.float32)
mask[:32, :32] = 0 mask[:32, :32] = 1
if str(device).startswith("mps"): if str(device).startswith("mps"):
generator = torch.manual_seed(seed) generator = torch.manual_seed(seed)
...@@ -186,6 +163,39 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -186,6 +163,39 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
} }
return inputs return inputs
class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22InpaintPipeline
params = ["image_embeds", "negative_image_embeds", "image", "mask_image"]
batch_params = [
"image_embeds",
"negative_image_embeds",
"image",
"mask_image",
]
required_optional_params = [
"generator",
"height",
"width",
"latents",
"guidance_scale",
"num_inference_steps",
"return_dict",
"guidance_scale",
"num_images_per_prompt",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_inpaint(self): def test_kandinsky_inpaint(self):
device = "cpu" device = "cpu"
...@@ -207,8 +217,6 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -207,8 +217,6 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
print(f"image.shape {image.shape}")
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array( expected_slice = np.array(
...@@ -244,8 +252,8 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase): ...@@ -244,8 +252,8 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
init_image = load_image( init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/cat.png"
) )
mask = np.ones((768, 768), dtype=np.float32) mask = np.zeros((768, 768), dtype=np.float32)
mask[:250, 250:-250] = 0 mask[:250, 250:-250] = 1
prompt = "a hat" prompt = "a hat"
......
...@@ -37,22 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin ...@@ -37,22 +37,7 @@ from ..test_pipelines_common import PipelineTesterMixin
enable_full_determinism() enable_full_determinism()
class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): class Dummies:
pipeline_class = KandinskyV22PriorPipeline
params = ["prompt"]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"num_images_per_prompt",
"generator",
"num_inference_steps",
"latents",
"negative_prompt",
"guidance_scale",
"output_type",
"return_dict",
]
test_xformers_attention = False
@property @property
def text_embedder_hidden_size(self): def text_embedder_hidden_size(self):
return 32 return 32
...@@ -183,6 +168,31 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase) ...@@ -183,6 +168,31 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
} }
return inputs return inputs
class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
pipeline_class = KandinskyV22PriorPipeline
params = ["prompt"]
batch_params = ["prompt", "negative_prompt"]
required_optional_params = [
"num_images_per_prompt",
"generator",
"num_inference_steps",
"latents",
"negative_prompt",
"guidance_scale",
"output_type",
"return_dict",
]
test_xformers_attention = False
def get_dummy_components(self):
dummies = Dummies()
return dummies.get_dummy_components()
def get_dummy_inputs(self, device, seed=0):
dummies = Dummies()
return dummies.get_dummy_inputs(device=device, seed=seed)
def test_kandinsky_prior(self): def test_kandinsky_prior(self):
device = "cpu" device = "cpu"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment