"vscode:/vscode.git/clone" did not exist on "fe8a163986672bbbec1a922231be229cc79dafe6"
Unverified Commit fee93c81 authored by Dhruv Nair's avatar Dhruv Nair Committed by GitHub
Browse files

[Refactor] Update from single file (#6428)

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update'

* update

* update

* update

* update

* update

* update

* up

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* up

* update

* update

* update

* update

* update'

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* clean

* update

* update

* clean up

* clean up

* update

* clean

* clean

* update

* updaet

* clean up

* fix docs

* update

* update

* Revert "update"

This reverts commit dbfb8f1ea9c61a2b4e02f926245be2b3d387e577.

* update

* update

* update

* update

* fix controlnet

* fix scheduler

* fix controlnet tests
parent 5308cce9
...@@ -30,8 +30,8 @@ To learn more about how to load single file weights, see the [Load different Sta ...@@ -30,8 +30,8 @@ To learn more about how to load single file weights, see the [Load different Sta
## FromOriginalVAEMixin ## FromOriginalVAEMixin
[[autodoc]] loaders.single_file.FromOriginalVAEMixin [[autodoc]] loaders.autoencoder.FromOriginalVAEMixin
## FromOriginalControlnetMixin ## FromOriginalControlnetMixin
[[autodoc]] loaders.single_file.FromOriginalControlnetMixin [[autodoc]] loaders.controlnet.FromOriginalControlNetMixin
\ No newline at end of file \ No newline at end of file
...@@ -54,12 +54,13 @@ if is_transformers_available(): ...@@ -54,12 +54,13 @@ if is_transformers_available():
_import_structure = {} _import_structure = {}
if is_torch_available(): if is_torch_available():
_import_structure["single_file"] = ["FromOriginalControlnetMixin", "FromOriginalVAEMixin"] _import_structure["autoencoder"] = ["FromOriginalVAEMixin"]
_import_structure["controlnet"] = ["FromOriginalControlNetMixin"]
_import_structure["unet"] = ["UNet2DConditionLoadersMixin"] _import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
_import_structure["utils"] = ["AttnProcsLayers"] _import_structure["utils"] = ["AttnProcsLayers"]
if is_transformers_available(): if is_transformers_available():
_import_structure["single_file"].extend(["FromSingleFileMixin"]) _import_structure["single_file"] = ["FromSingleFileMixin"]
_import_structure["lora"] = ["LoraLoaderMixin", "StableDiffusionXLLoraLoaderMixin"] _import_structure["lora"] = ["LoraLoaderMixin", "StableDiffusionXLLoraLoaderMixin"]
_import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"] _import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
_import_structure["ip_adapter"] = ["IPAdapterMixin"] _import_structure["ip_adapter"] = ["IPAdapterMixin"]
...@@ -69,7 +70,8 @@ _import_structure["peft"] = ["PeftAdapterMixin"] ...@@ -69,7 +70,8 @@ _import_structure["peft"] = ["PeftAdapterMixin"]
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
if is_torch_available(): if is_torch_available():
from .single_file import FromOriginalControlnetMixin, FromOriginalVAEMixin from .autoencoder import FromOriginalVAEMixin
from .controlnet import FromOriginalControlNetMixin
from .unet import UNet2DConditionLoadersMixin from .unet import UNet2DConditionLoadersMixin
from .utils import AttnProcsLayers from .utils import AttnProcsLayers
......
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from huggingface_hub.utils import validate_hf_hub_args
from .single_file_utils import (
create_diffusers_vae_model_from_ldm,
fetch_ldm_config_and_checkpoint,
)
class FromOriginalVAEMixin:
"""
Load pretrained AutoencoderKL weights saved in the `.ckpt` or `.safetensors` format into a [`AutoencoderKL`].
"""
@classmethod
@validate_hf_hub_args
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
r"""
Instantiate a [`AutoencoderKL`] from pretrained ControlNet weights saved in the original `.ckpt` or
`.safetensors` format. The pipeline is set in evaluation mode (`model.eval()`) by default.
Parameters:
pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*):
Can be either:
- A link to the `.ckpt` file (for example
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
- A path to a *file* containing all pipeline weights.
torch_dtype (`str` or `torch.dtype`, *optional*):
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
dtype is automatically derived from the model's weights.
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
cached versions if they exist.
cache_dir (`Union[str, os.PathLike]`, *optional*):
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
is not used.
resume_download (`bool`, *optional*, defaults to `False`):
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
incompletely downloaded files are deleted.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
local_files_only (`bool`, *optional*, defaults to `False`):
Whether to only load local model weights and configuration files or not. If set to True, the model
won't be downloaded from the Hub.
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
`diffusers-cli login` (stored in `~/.huggingface`) is used.
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
allowed by Git.
image_size (`int`, *optional*, defaults to 512):
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
use_safetensors (`bool`, *optional*, defaults to `None`):
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
weights. If set to `False`, safetensors weights are not loaded.
kwargs (remaining dictionary of keyword arguments, *optional*):
Can be used to overwrite load and saveable variables (for example the pipeline components of the
specific pipeline class). The overwritten components are directly passed to the pipelines `__init__`
method. See example below for more information.
<Tip warning={true}>
Make sure to pass both `image_size` and `scaling_factor` to `from_single_file()` if you're loading
a VAE from SDXL or a Stable Diffusion v2 model or higher.
</Tip>
Examples:
```py
from diffusers import AutoencoderKL
url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" # can also be local file
model = AutoencoderKL.from_single_file(url)
```
"""
original_config_file = kwargs.pop("original_config_file", None)
resume_download = kwargs.pop("resume_download", False)
force_download = kwargs.pop("force_download", False)
proxies = kwargs.pop("proxies", None)
token = kwargs.pop("token", None)
cache_dir = kwargs.pop("cache_dir", None)
local_files_only = kwargs.pop("local_files_only", None)
revision = kwargs.pop("revision", None)
torch_dtype = kwargs.pop("torch_dtype", None)
use_safetensors = kwargs.pop("use_safetensors", True)
class_name = cls.__name__
original_config, checkpoint = fetch_ldm_config_and_checkpoint(
pretrained_model_link_or_path=pretrained_model_link_or_path,
class_name=class_name,
original_config_file=original_config_file,
resume_download=resume_download,
force_download=force_download,
proxies=proxies,
token=token,
revision=revision,
local_files_only=local_files_only,
use_safetensors=use_safetensors,
cache_dir=cache_dir,
)
image_size = kwargs.pop("image_size", None)
component = create_diffusers_vae_model_from_ldm(class_name, original_config, checkpoint, image_size=image_size)
vae = component["vae"]
if torch_dtype is not None:
vae = vae.to(torch_dtype)
return vae
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from huggingface_hub.utils import validate_hf_hub_args
from .single_file_utils import (
create_diffusers_controlnet_model_from_ldm,
fetch_ldm_config_and_checkpoint,
)
class FromOriginalControlNetMixin:
"""
Load pretrained ControlNet weights saved in the `.ckpt` or `.safetensors` format into a [`ControlNetModel`].
"""
@classmethod
@validate_hf_hub_args
def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
r"""
Instantiate a [`ControlNetModel`] from pretrained ControlNet weights saved in the original `.ckpt` or
`.safetensors` format. The pipeline is set in evaluation mode (`model.eval()`) by default.
Parameters:
pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*):
Can be either:
- A link to the `.ckpt` file (for example
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
- A path to a *file* containing all pipeline weights.
torch_dtype (`str` or `torch.dtype`, *optional*):
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
dtype is automatically derived from the model's weights.
force_download (`bool`, *optional*, defaults to `False`):
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
cached versions if they exist.
cache_dir (`Union[str, os.PathLike]`, *optional*):
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
is not used.
resume_download (`bool`, *optional*, defaults to `False`):
Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
incompletely downloaded files are deleted.
proxies (`Dict[str, str]`, *optional*):
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
local_files_only (`bool`, *optional*, defaults to `False`):
Whether to only load local model weights and configuration files or not. If set to True, the model
won't be downloaded from the Hub.
token (`str` or *bool*, *optional*):
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
`diffusers-cli login` (stored in `~/.huggingface`) is used.
revision (`str`, *optional*, defaults to `"main"`):
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
allowed by Git.
use_safetensors (`bool`, *optional*, defaults to `None`):
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
weights. If set to `False`, safetensors weights are not loaded.
image_size (`int`, *optional*, defaults to 512):
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
upcast_attention (`bool`, *optional*, defaults to `None`):
Whether the attention computation should always be upcasted.
kwargs (remaining dictionary of keyword arguments, *optional*):
Can be used to overwrite load and saveable variables (for example the pipeline components of the
specific pipeline class). The overwritten components are directly passed to the pipelines `__init__`
method. See example below for more information.
Examples:
```py
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
url = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" # can also be a local path
model = ControlNetModel.from_single_file(url)
url = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned.safetensors" # can also be a local path
pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet)
```
"""
original_config_file = kwargs.pop("original_config_file", None)
resume_download = kwargs.pop("resume_download", False)
force_download = kwargs.pop("force_download", False)
proxies = kwargs.pop("proxies", None)
token = kwargs.pop("token", None)
cache_dir = kwargs.pop("cache_dir", None)
local_files_only = kwargs.pop("local_files_only", None)
revision = kwargs.pop("revision", None)
torch_dtype = kwargs.pop("torch_dtype", None)
use_safetensors = kwargs.pop("use_safetensors", True)
class_name = cls.__name__
original_config, checkpoint = fetch_ldm_config_and_checkpoint(
pretrained_model_link_or_path=pretrained_model_link_or_path,
class_name=class_name,
original_config_file=original_config_file,
resume_download=resume_download,
force_download=force_download,
proxies=proxies,
token=token,
revision=revision,
local_files_only=local_files_only,
use_safetensors=use_safetensors,
cache_dir=cache_dir,
)
upcast_attention = kwargs.pop("upcast_attention", False)
image_size = kwargs.pop("image_size", None)
component = create_diffusers_controlnet_model_from_ldm(
class_name, original_config, checkpoint, upcast_attention=upcast_attention, image_size=image_size
)
controlnet = component["controlnet"]
if torch_dtype is not None:
controlnet = controlnet.to(torch_dtype)
return controlnet
This diff is collapsed.
This diff is collapsed.
...@@ -17,7 +17,6 @@ import torch ...@@ -17,7 +17,6 @@ import torch
import torch.nn as nn import torch.nn as nn
from ...configuration_utils import ConfigMixin, register_to_config from ...configuration_utils import ConfigMixin, register_to_config
from ...loaders import FromOriginalVAEMixin
from ...utils import is_torch_version from ...utils import is_torch_version
from ...utils.accelerate_utils import apply_forward_hook from ...utils.accelerate_utils import apply_forward_hook
from ..attention_processor import CROSS_ATTENTION_PROCESSORS, AttentionProcessor, AttnProcessor from ..attention_processor import CROSS_ATTENTION_PROCESSORS, AttentionProcessor, AttnProcessor
...@@ -162,7 +161,7 @@ class TemporalDecoder(nn.Module): ...@@ -162,7 +161,7 @@ class TemporalDecoder(nn.Module):
return sample return sample
class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin, FromOriginalVAEMixin): class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin):
r""" r"""
A VAE model with KL loss for encoding images into latents and decoding latent representations into images. A VAE model with KL loss for encoding images into latents and decoding latent representations into images.
......
...@@ -19,7 +19,7 @@ from torch import nn ...@@ -19,7 +19,7 @@ from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import FromOriginalControlnetMixin from ..loaders import FromOriginalControlNetMixin
from ..utils import BaseOutput, logging from ..utils import BaseOutput, logging
from .attention_processor import ( from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS, ADDED_KV_ATTENTION_PROCESSORS,
...@@ -108,7 +108,7 @@ class ControlNetConditioningEmbedding(nn.Module): ...@@ -108,7 +108,7 @@ class ControlNetConditioningEmbedding(nn.Module):
return embedding return embedding
class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin): class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin):
""" """
A ControlNet model. A ControlNet model.
......
...@@ -32,6 +32,7 @@ from .. import __version__ ...@@ -32,6 +32,7 @@ from .. import __version__
from ..utils import ( from ..utils import (
CONFIG_NAME, CONFIG_NAME,
FLAX_WEIGHTS_NAME, FLAX_WEIGHTS_NAME,
SAFETENSORS_FILE_EXTENSION,
SAFETENSORS_WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME,
WEIGHTS_NAME, WEIGHTS_NAME,
_add_variant, _add_variant,
...@@ -102,10 +103,11 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike], variant: Optional[ ...@@ -102,10 +103,11 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike], variant: Optional[
Reads a checkpoint file, returning properly formatted errors if they arise. Reads a checkpoint file, returning properly formatted errors if they arise.
""" """
try: try:
if os.path.basename(checkpoint_file) == _add_variant(WEIGHTS_NAME, variant): file_extension = os.path.basename(checkpoint_file).split(".")[-1]
return torch.load(checkpoint_file, map_location="cpu") if file_extension == SAFETENSORS_FILE_EXTENSION:
else:
return safetensors.torch.load_file(checkpoint_file, device="cpu") return safetensors.torch.load_file(checkpoint_file, device="cpu")
else:
return torch.load(checkpoint_file, map_location="cpu")
except Exception as e: except Exception as e:
try: try:
with open(checkpoint_file) as f: with open(checkpoint_file) as f:
......
...@@ -351,7 +351,7 @@ def get_class_obj_and_candidates( ...@@ -351,7 +351,7 @@ def get_class_obj_and_candidates(
def _get_pipeline_class( def _get_pipeline_class(
class_obj, class_obj,
config, config=None,
load_connected_pipeline=False, load_connected_pipeline=False,
custom_pipeline=None, custom_pipeline=None,
repo_id=None, repo_id=None,
...@@ -389,7 +389,12 @@ def _get_pipeline_class( ...@@ -389,7 +389,12 @@ def _get_pipeline_class(
return class_obj return class_obj
diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0]) diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0])
class_name = config["_class_name"] class_name = class_name or config["_class_name"]
if not class_name:
raise ValueError(
"The class name could not be found in the configuration file. Please make sure to pass the correct `class_name`."
)
class_name = class_name[4:] if class_name.startswith("Flax") else class_name class_name = class_name[4:] if class_name.startswith("Flax") else class_name
pipeline_cls = getattr(diffusers_module, class_name) pipeline_cls = getattr(diffusers_module, class_name)
......
...@@ -28,6 +28,7 @@ from .constants import ( ...@@ -28,6 +28,7 @@ from .constants import (
MIN_PEFT_VERSION, MIN_PEFT_VERSION,
ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_EXTERNAL_WEIGHTS_NAME,
ONNX_WEIGHTS_NAME, ONNX_WEIGHTS_NAME,
SAFETENSORS_FILE_EXTENSION,
SAFETENSORS_WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME,
USE_PEFT_BACKEND, USE_PEFT_BACKEND,
WEIGHTS_NAME, WEIGHTS_NAME,
......
...@@ -31,6 +31,7 @@ WEIGHTS_NAME = "diffusion_pytorch_model.bin" ...@@ -31,6 +31,7 @@ WEIGHTS_NAME = "diffusion_pytorch_model.bin"
FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack"
ONNX_WEIGHTS_NAME = "model.onnx" ONNX_WEIGHTS_NAME = "model.onnx"
SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors" SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors"
SAFETENSORS_FILE_EXTENSION = "safetensors"
ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb" ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb"
HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
......
...@@ -244,15 +244,15 @@ def _get_model_file( ...@@ -244,15 +244,15 @@ def _get_model_file(
pretrained_model_name_or_path: Union[str, Path], pretrained_model_name_or_path: Union[str, Path],
*, *,
weights_name: str, weights_name: str,
subfolder: Optional[str], subfolder: Optional[str] = None,
cache_dir: Optional[str], cache_dir: Optional[str] = None,
force_download: bool, force_download: bool = False,
proxies: Optional[Dict], proxies: Optional[Dict] = None,
resume_download: bool, resume_download: bool = False,
local_files_only: bool, local_files_only: bool = False,
token: Optional[str], token: Optional[str] = None,
user_agent: Union[Dict, str, None], user_agent: Optional[Union[Dict, str]] = None,
revision: Optional[str], revision: Optional[str] = None,
commit_hash: Optional[str] = None, commit_hash: Optional[str] = None,
): ):
pretrained_model_name_or_path = str(pretrained_model_name_or_path) pretrained_model_name_or_path = str(pretrained_model_name_or_path)
......
...@@ -37,6 +37,7 @@ from diffusers.utils.testing_utils import ( ...@@ -37,6 +37,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
load_image, load_image,
load_numpy, load_numpy,
numpy_cosine_similarity_distance,
require_python39_or_higher, require_python39_or_higher,
require_torch_2, require_torch_2,
require_torch_gpu, require_torch_gpu,
...@@ -1022,39 +1023,49 @@ class ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -1022,39 +1023,49 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
def test_load_local(self): def test_load_local(self):
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
pipe_1 = StableDiffusionControlNetPipeline.from_pretrained( pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet "runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
) )
pipe.unet.set_default_attn_processor()
pipe.enable_model_cpu_offload()
controlnet = ControlNetModel.from_single_file( controlnet = ControlNetModel.from_single_file(
"https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth"
) )
pipe_2 = StableDiffusionControlNetPipeline.from_single_file( pipe_sf = StableDiffusionControlNetPipeline.from_single_file(
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors", "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
safety_checker=None, safety_checker=None,
controlnet=controlnet, controlnet=controlnet,
scheduler_type="pndm",
) )
pipes = [pipe_1, pipe_2] pipe_sf.unet.set_default_attn_processor()
images = [] pipe_sf.enable_model_cpu_offload()
for pipe in pipes: control_image = load_image(
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "bird"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
) ).resize((512, 512))
prompt = "bird"
output = pipe(prompt, image, generator=generator, output_type="np", num_inference_steps=3) generator = torch.Generator(device="cpu").manual_seed(0)
images.append(output.images[0]) output = pipe(
prompt,
image=control_image,
generator=generator,
output_type="np",
num_inference_steps=3,
).images[0]
del pipe generator = torch.Generator(device="cpu").manual_seed(0)
gc.collect() output_sf = pipe_sf(
torch.cuda.empty_cache() prompt,
image=control_image,
generator=generator,
output_type="np",
num_inference_steps=3,
).images[0]
assert np.abs(images[0] - images[1]).max() < 1e-3 max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten())
assert max_diff < 1e-3
@slow @slow
......
...@@ -39,6 +39,7 @@ from diffusers.utils.testing_utils import ( ...@@ -39,6 +39,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_numpy, load_numpy,
numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_gpu,
slow, slow,
torch_device, torch_device,
...@@ -421,33 +422,33 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase): ...@@ -421,33 +422,33 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
def test_load_local(self): def test_load_local(self):
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny") controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
pipe_1 = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet "runwayml/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
) )
pipe.unet.set_default_attn_processor()
pipe.enable_model_cpu_offload()
controlnet = ControlNetModel.from_single_file( controlnet = ControlNetModel.from_single_file(
"https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth"
) )
pipe_2 = StableDiffusionControlNetImg2ImgPipeline.from_single_file( pipe_sf = StableDiffusionControlNetImg2ImgPipeline.from_single_file(
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors", "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
safety_checker=None, safety_checker=None,
controlnet=controlnet, controlnet=controlnet,
scheduler_type="pndm",
) )
pipe_sf.unet.set_default_attn_processor()
pipe_sf.enable_model_cpu_offload()
control_image = load_image( control_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
).resize((512, 512)) ).resize((512, 512))
image = load_image( image = load_image(
"https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png" "https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png"
).resize((512, 512)) ).resize((512, 512))
prompt = "bird"
pipes = [pipe_1, pipe_2]
images = []
for pipe in pipes:
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "bird"
output = pipe( output = pipe(
prompt, prompt,
image=image, image=image,
...@@ -456,11 +457,18 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase): ...@@ -456,11 +457,18 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
generator=generator, generator=generator,
output_type="np", output_type="np",
num_inference_steps=3, num_inference_steps=3,
) ).images[0]
images.append(output.images[0])
del pipe generator = torch.Generator(device="cpu").manual_seed(0)
gc.collect() output_sf = pipe_sf(
torch.cuda.empty_cache() prompt,
image=image,
control_image=control_image,
strength=0.9,
generator=generator,
output_type="np",
num_inference_steps=3,
).images[0]
assert np.abs(images[0] - images[1]).max() < 1e-3 max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten())
assert max_diff < 1e-3
...@@ -569,6 +569,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase): ...@@ -569,6 +569,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
"https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors", "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors",
safety_checker=None, safety_checker=None,
controlnet=controlnet, controlnet=controlnet,
scheduler_type="pndm",
) )
control_image = load_image( control_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png"
...@@ -605,4 +606,5 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase): ...@@ -605,4 +606,5 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
gc.collect() gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
assert np.abs(images[0] - images[1]).max() < 1e-3 max_diff = numpy_cosine_similarity_distance(images[0].flatten(), images[1].flatten())
assert max_diff < 1e-3
...@@ -31,7 +31,14 @@ from diffusers import ( ...@@ -31,7 +31,14 @@ from diffusers import (
from diffusers.models.unets.unet_2d_blocks import UNetMidBlock2D from diffusers.models.unets.unet_2d_blocks import UNetMidBlock2D
from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import enable_full_determinism, load_image, require_torch_gpu, slow, torch_device from diffusers.utils.testing_utils import (
enable_full_determinism,
load_image,
numpy_cosine_similarity_distance,
require_torch_gpu,
slow,
torch_device,
)
from diffusers.utils.torch_utils import randn_tensor from diffusers.utils.torch_utils import randn_tensor
from ..pipeline_params import ( from ..pipeline_params import (
...@@ -819,6 +826,41 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase): ...@@ -819,6 +826,41 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
expected_image = np.array([0.4399, 0.5112, 0.5478, 0.4314, 0.472, 0.4823, 0.4647, 0.4957, 0.4853]) expected_image = np.array([0.4399, 0.5112, 0.5478, 0.4314, 0.472, 0.4823, 0.4647, 0.4957, 0.4853])
assert np.allclose(original_image, expected_image, atol=1e-04) assert np.allclose(original_image, expected_image, atol=1e-04)
def test_download_ckpt_diff_format_is_same(self):
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16)
single_file_url = (
"https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors"
)
pipe_single_file = StableDiffusionXLControlNetPipeline.from_single_file(
single_file_url, controlnet=controlnet, torch_dtype=torch.float16
)
pipe_single_file.unet.set_default_attn_processor()
pipe_single_file.enable_model_cpu_offload()
pipe_single_file.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "Stormtrooper's lecture"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/stormtrooper_depth.png"
)
single_file_images = pipe_single_file(
prompt, image=image, generator=generator, output_type="np", num_inference_steps=2
).images
generator = torch.Generator(device="cpu").manual_seed(0)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.unet.set_default_attn_processor()
pipe.enable_model_cpu_offload()
images = pipe(prompt, image=image, generator=generator, output_type="np", num_inference_steps=2).images
assert images[0].shape == (512, 512, 3)
assert single_file_images[0].shape == (512, 512, 3)
max_diff = numpy_cosine_similarity_distance(images[0].flatten(), single_file_images[0].flatten())
assert max_diff < 5e-2
class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNetPipelineFastTests): class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNetPipelineFastTests):
def test_controlnet_sdxl_guess(self): def test_controlnet_sdxl_guess(self):
......
...@@ -1262,13 +1262,13 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase): ...@@ -1262,13 +1262,13 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
def test_download_ckpt_diff_format_is_same(self): def test_download_ckpt_diff_format_is_same(self):
ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt" ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt"
pipe = StableDiffusionPipeline.from_single_file(ckpt_path) sf_pipe = StableDiffusionPipeline.from_single_file(ckpt_path)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) sf_pipe.scheduler = DDIMScheduler.from_config(sf_pipe.scheduler.config)
pipe.unet.set_attn_processor(AttnProcessor()) sf_pipe.unet.set_attn_processor(AttnProcessor())
pipe.to("cuda") sf_pipe.to("cuda")
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
image_ckpt = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0] image_single_file = sf_pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0]
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
...@@ -1278,7 +1278,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase): ...@@ -1278,7 +1278,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
image = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0] image = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0]
max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten()) max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten())
assert max_diff < 1e-3 assert max_diff < 1e-3
......
...@@ -43,6 +43,7 @@ from diffusers.utils.testing_utils import ( ...@@ -43,6 +43,7 @@ from diffusers.utils.testing_utils import (
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
numpy_cosine_similarity_distance,
require_python39_or_higher, require_python39_or_higher,
require_torch_2, require_torch_2,
require_torch_gpu, require_torch_gpu,
...@@ -771,7 +772,9 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase): ...@@ -771,7 +772,9 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
inputs["num_inference_steps"] = 5 inputs["num_inference_steps"] = 5
image = pipe(**inputs).images[0] image = pipe(**inputs).images[0]
assert np.max(np.abs(image - image_ckpt)) < 5e-4 max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten())
assert max_diff < 1e-4
@slow @slow
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
import copy import copy
import gc
import tempfile import tempfile
import unittest import unittest
...@@ -1024,6 +1025,11 @@ class StableDiffusionXLPipelineFastTests( ...@@ -1024,6 +1025,11 @@ class StableDiffusionXLPipelineFastTests(
@slow @slow
class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase): class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase):
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_stable_diffusion_lcm(self): def test_stable_diffusion_lcm(self):
torch.manual_seed(0) torch.manual_seed(0)
unet = UNet2DConditionModel.from_pretrained( unet = UNet2DConditionModel.from_pretrained(
...@@ -1049,3 +1055,30 @@ class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase): ...@@ -1049,3 +1055,30 @@ class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase):
max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-2 assert max_diff < 1e-2
def test_download_ckpt_diff_format_is_same(self):
ckpt_path = (
"https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors"
)
pipe = StableDiffusionXLPipeline.from_single_file(ckpt_path, torch_dtype=torch.float16)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.unet.set_default_attn_processor()
pipe.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
image_ckpt = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0]
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.unet.set_default_attn_processor()
pipe.enable_model_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
image = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0]
max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten())
assert max_diff < 6e-3
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment