Unverified Commit 189e9f01 authored by Dhruv Nair's avatar Dhruv Nair Committed by GitHub
Browse files

Test Cleanup Precision issues (#4812)



* proposal for flaky tests

* more precision fixes

* move more tests to use cosine distance

* more test fixes

* clean up

* use default attn

* clean up

* update expected value

* make style

* make style

* Apply suggestions from code review

* Update src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py

* make style

* fix failing tests

---------
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
parent 38466c36
...@@ -1621,6 +1621,24 @@ LORA_ATTENTION_PROCESSORS = ( ...@@ -1621,6 +1621,24 @@ LORA_ATTENTION_PROCESSORS = (
LoRAAttnAddedKVProcessor, LoRAAttnAddedKVProcessor,
) )
ADDED_KV_ATTENTION_PROCESSORS = (
AttnAddedKVProcessor,
SlicedAttnAddedKVProcessor,
AttnAddedKVProcessor2_0,
XFormersAttnAddedKVProcessor,
LoRAAttnAddedKVProcessor,
)
CROSS_ATTENTION_PROCESSORS = (
AttnProcessor,
AttnProcessor2_0,
XFormersAttnProcessor,
SlicedAttnProcessor,
LoRAAttnProcessor,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
)
AttentionProcessor = Union[ AttentionProcessor = Union[
AttnProcessor, AttnProcessor,
AttnProcessor2_0, AttnProcessor2_0,
......
...@@ -20,7 +20,13 @@ import torch.nn as nn ...@@ -20,7 +20,13 @@ import torch.nn as nn
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import FromOriginalVAEMixin from ..loaders import FromOriginalVAEMixin
from ..utils import BaseOutput, apply_forward_hook from ..utils import BaseOutput, apply_forward_hook
from .attention_processor import AttentionProcessor, AttnProcessor from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from .modeling_utils import ModelMixin from .modeling_utils import ModelMixin
from .vae import Decoder, DecoderOutput, DiagonalGaussianDistribution, Encoder from .vae import Decoder, DecoderOutput, DiagonalGaussianDistribution, Encoder
...@@ -228,7 +234,16 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin): ...@@ -228,7 +234,16 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin):
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
@apply_forward_hook @apply_forward_hook
def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> AutoencoderKLOutput: def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> AutoencoderKLOutput:
......
...@@ -21,7 +21,13 @@ from torch.nn import functional as F ...@@ -21,7 +21,13 @@ from torch.nn import functional as F
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import FromOriginalControlnetMixin from ..loaders import FromOriginalControlnetMixin
from ..utils import BaseOutput, logging from ..utils import BaseOutput, logging
from .attention_processor import AttentionProcessor, AttnProcessor from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from .embeddings import TextImageProjection, TextImageTimeEmbedding, TextTimeEmbedding, TimestepEmbedding, Timesteps from .embeddings import TextImageProjection, TextImageTimeEmbedding, TextTimeEmbedding, TimestepEmbedding, Timesteps
from .modeling_utils import ModelMixin from .modeling_utils import ModelMixin
from .unet_2d_blocks import ( from .unet_2d_blocks import (
...@@ -550,7 +556,16 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin): ...@@ -550,7 +556,16 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice
def set_attention_slice(self, slice_size): def set_attention_slice(self, slice_size):
......
...@@ -8,7 +8,13 @@ from torch import nn ...@@ -8,7 +8,13 @@ from torch import nn
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput from ..utils import BaseOutput
from .attention import BasicTransformerBlock from .attention import BasicTransformerBlock
from .attention_processor import AttentionProcessor, AttnProcessor from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from .embeddings import TimestepEmbedding, Timesteps from .embeddings import TimestepEmbedding, Timesteps
from .modeling_utils import ModelMixin from .modeling_utils import ModelMixin
...@@ -224,7 +230,16 @@ class PriorTransformer(ModelMixin, ConfigMixin): ...@@ -224,7 +230,16 @@ class PriorTransformer(ModelMixin, ConfigMixin):
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
def forward( def forward(
self, self,
......
...@@ -22,7 +22,13 @@ from ..configuration_utils import ConfigMixin, register_to_config ...@@ -22,7 +22,13 @@ from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import UNet2DConditionLoadersMixin from ..loaders import UNet2DConditionLoadersMixin
from ..utils import BaseOutput, logging from ..utils import BaseOutput, logging
from .activations import get_activation from .activations import get_activation
from .attention_processor import AttentionProcessor, AttnProcessor from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from .embeddings import ( from .embeddings import (
GaussianFourierProjection, GaussianFourierProjection,
ImageHintTimeEmbedding, ImageHintTimeEmbedding,
...@@ -639,7 +645,16 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin) ...@@ -639,7 +645,16 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
def set_attention_slice(self, slice_size): def set_attention_slice(self, slice_size):
r""" r"""
......
...@@ -22,7 +22,13 @@ import torch.utils.checkpoint ...@@ -22,7 +22,13 @@ import torch.utils.checkpoint
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..loaders import UNet2DConditionLoadersMixin from ..loaders import UNet2DConditionLoadersMixin
from ..utils import BaseOutput, logging from ..utils import BaseOutput, logging
from .attention_processor import AttentionProcessor, AttnProcessor from .attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from .embeddings import TimestepEmbedding, Timesteps from .embeddings import TimestepEmbedding, Timesteps
from .modeling_utils import ModelMixin from .modeling_utils import ModelMixin
from .transformer_temporal import TransformerTemporalModel from .transformer_temporal import TransformerTemporalModel
...@@ -439,7 +445,16 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin) ...@@ -439,7 +445,16 @@ class UNet3DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin)
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
def _set_gradient_checkpointing(self, module, value=False): def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, (CrossAttnDownBlock3D, DownBlock3D, CrossAttnUpBlock3D, UpBlock3D)): if isinstance(module, (CrossAttnDownBlock3D, DownBlock3D, CrossAttnUpBlock3D, UpBlock3D)):
......
...@@ -22,7 +22,13 @@ import torch.utils.checkpoint ...@@ -22,7 +22,13 @@ import torch.utils.checkpoint
from ...configuration_utils import ConfigMixin, register_to_config from ...configuration_utils import ConfigMixin, register_to_config
from ...loaders import UNet2DConditionLoadersMixin from ...loaders import UNet2DConditionLoadersMixin
from ...models.activations import get_activation from ...models.activations import get_activation
from ...models.attention_processor import AttentionProcessor, AttnProcessor from ...models.attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor,
AttnAddedKVProcessor,
AttnProcessor,
)
from ...models.embeddings import ( from ...models.embeddings import (
TimestepEmbedding, TimestepEmbedding,
Timesteps, Timesteps,
...@@ -571,7 +577,16 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad ...@@ -571,7 +577,16 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoad
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
# Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.set_attention_slice
def set_attention_slice(self, slice_size): def set_attention_slice(self, slice_size):
......
...@@ -10,6 +10,8 @@ from ...models import ModelMixin ...@@ -10,6 +10,8 @@ from ...models import ModelMixin
from ...models.activations import get_activation from ...models.activations import get_activation
from ...models.attention import Attention from ...models.attention import Attention
from ...models.attention_processor import ( from ...models.attention_processor import (
ADDED_KV_ATTENTION_PROCESSORS,
CROSS_ATTENTION_PROCESSORS,
AttentionProcessor, AttentionProcessor,
AttnAddedKVProcessor, AttnAddedKVProcessor,
AttnAddedKVProcessor2_0, AttnAddedKVProcessor2_0,
...@@ -844,7 +846,17 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): ...@@ -844,7 +846,17 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
""" """
Disables custom attention processors and sets the default attention implementation. Disables custom attention processors and sets the default attention implementation.
""" """
self.set_attn_processor(AttnProcessor()) if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnAddedKVProcessor()
elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()):
processor = AttnProcessor()
else:
raise ValueError(
"Cannot call `set_default_attn_processor` when attention processors are of type"
f" {next(iter(self.attn_processors.values()))}"
)
self.set_attn_processor(processor)
def set_attention_slice(self, slice_size): def set_attention_slice(self, slice_size):
r""" r"""
......
...@@ -263,7 +263,7 @@ class NCSNppModelTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase): ...@@ -263,7 +263,7 @@ class NCSNppModelTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase):
output_slice = output[0, -3:, -3:, -1].flatten().cpu() output_slice = output[0, -3:, -3:, -1].flatten().cpu()
# fmt: off # fmt: off
expected_output_slice = torch.tensor([-4842.8691, -6499.6631, -3800.1953, -7978.2686, -10980.7129, -20028.8535, 8148.2822, 2342.2905, 567.7608]) expected_output_slice = torch.tensor([-4836.2178, -6487.1470, -3816.8196, -7964.9302, -10966.3037, -20043.5957, 8137.0513, 2340.3328, 544.6056])
# fmt: on # fmt: on
self.assertTrue(torch_all_close(output_slice, expected_output_slice, rtol=1e-2)) self.assertTrue(torch_all_close(output_slice, expected_output_slice, rtol=1e-2))
......
...@@ -673,7 +673,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -673,7 +673,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
new_model.load_attn_procs(tmpdirname, use_safetensors=True) new_model.load_attn_procs(tmpdirname, use_safetensors=True)
self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception)) self.assertIn("Error no file named pytorch_lora_weights.safetensors", str(e.exception))
def test_lora_on_off(self): def test_lora_on_off(self, expected_max_diff=1e-3):
# enable deterministic behavior for gradient checkpointing # enable deterministic behavior for gradient checkpointing
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
...@@ -697,14 +697,17 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -697,14 +697,17 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
with torch.no_grad(): with torch.no_grad():
new_sample = model(**inputs_dict).sample new_sample = model(**inputs_dict).sample
assert (sample - new_sample).abs().max() < 1e-4 max_diff_new_sample = (sample - new_sample).abs().max()
assert (sample - old_sample).abs().max() < 3e-3 max_diff_old_sample = (sample - old_sample).abs().max()
assert max_diff_new_sample < expected_max_diff
assert max_diff_old_sample < expected_max_diff
@unittest.skipIf( @unittest.skipIf(
torch_device != "cuda" or not is_xformers_available(), torch_device != "cuda" or not is_xformers_available(),
reason="XFormers attention is only available with CUDA and `xformers` installed", reason="XFormers attention is only available with CUDA and `xformers` installed",
) )
def test_lora_xformers_on_off(self): def test_lora_xformers_on_off(self, expected_max_diff=1e-3):
# enable deterministic behavior for gradient checkpointing # enable deterministic behavior for gradient checkpointing
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
...@@ -726,8 +729,11 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -726,8 +729,11 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
model.disable_xformers_memory_efficient_attention() model.disable_xformers_memory_efficient_attention()
off_sample = model(**inputs_dict).sample off_sample = model(**inputs_dict).sample
assert (sample - on_sample).abs().max() < 1e-4 max_diff_on_sample = (sample - on_sample).abs().max()
assert (sample - off_sample).abs().max() < 1e-4 max_diff_off_sample = (sample - off_sample).abs().max()
assert max_diff_on_sample < expected_max_diff
assert max_diff_off_sample < expected_max_diff
def test_custom_diffusion_processors(self): def test_custom_diffusion_processors(self):
# enable deterministic behavior for gradient checkpointing # enable deterministic behavior for gradient checkpointing
......
...@@ -218,6 +218,9 @@ class KandinskyV22ControlnetImg2ImgPipelineFastTests(PipelineTesterMixin, unitte ...@@ -218,6 +218,9 @@ class KandinskyV22ControlnetImg2ImgPipelineFastTests(PipelineTesterMixin, unitte
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=1.75e-3)
@slow @slow
@require_torch_gpu @require_torch_gpu
......
...@@ -42,6 +42,7 @@ from diffusers.utils import load_numpy, nightly, slow, torch_device ...@@ -42,6 +42,7 @@ from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
enable_full_determinism, enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_2, require_torch_2,
require_torch_gpu, require_torch_gpu,
run_test_in_subprocess, run_test_in_subprocess,
...@@ -760,7 +761,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -760,7 +761,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
# make sure that more than 3.75 GB is allocated # make sure that more than 3.75 GB is allocated
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
assert mem_bytes > 3.75 * 10**9 assert mem_bytes > 3.75 * 10**9
assert np.abs(image_sliced - image).max() < 1e-3 max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten())
assert max_diff < 1e-3
def test_stable_diffusion_vae_slicing(self): def test_stable_diffusion_vae_slicing(self):
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
...@@ -792,7 +794,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -792,7 +794,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
assert mem_bytes > 4e9 assert mem_bytes > 4e9
# There is a small discrepancy at the image borders vs. a fully batched version. # There is a small discrepancy at the image borders vs. a fully batched version.
assert np.abs(image_sliced - image).max() < 1e-2 max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten())
assert max_diff < 1e-2
def test_stable_diffusion_vae_tiling(self): def test_stable_diffusion_vae_tiling(self):
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
...@@ -837,7 +840,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -837,7 +840,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
image = output.images image = output.images
assert mem_bytes < 1e10 assert mem_bytes < 1e10
assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2 max_diff = numpy_cosine_similarity_distance(image_chunked.flatten(), image.flatten())
assert max_diff < 1e-2
def test_stable_diffusion_fp16_vs_autocast(self): def test_stable_diffusion_fp16_vs_autocast(self):
# this test makes sure that the original model with autocast # this test makes sure that the original model with autocast
...@@ -968,7 +972,11 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -968,7 +972,11 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
outputs_offloaded = pipe(**inputs) outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated() mem_bytes_offloaded = torch.cuda.max_memory_allocated()
assert np.abs(outputs.images - outputs_offloaded.images).max() < 1e-3 images = outputs.images
offloaded_images = outputs_offloaded.images
max_diff = numpy_cosine_similarity_distance(images.flatten(), offloaded_images.flatten())
assert max_diff < 1e-3
assert mem_bytes_offloaded < mem_bytes assert mem_bytes_offloaded < mem_bytes
assert mem_bytes_offloaded < 3.5 * 10**9 assert mem_bytes_offloaded < 3.5 * 10**9
for module in pipe.text_encoder, pipe.unet, pipe.vae, pipe.safety_checker: for module in pipe.text_encoder, pipe.unet, pipe.vae, pipe.safety_checker:
...@@ -1075,7 +1083,9 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase): ...@@ -1075,7 +1083,9 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
image = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0] image = pipe("a turtle", num_inference_steps=2, generator=generator, output_type="np").images[0]
assert np.max(np.abs(image - image_ckpt)) < 1e-3 max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten())
assert max_diff < 1e-3
@nightly @nightly
......
...@@ -147,7 +147,10 @@ class StableDiffusionPanoramaPipelineFastTests(PipelineLatentTesterMixin, Pipeli ...@@ -147,7 +147,10 @@ class StableDiffusionPanoramaPipelineFastTests(PipelineLatentTesterMixin, Pipeli
# override to speed the overall test timing up. # override to speed the overall test timing up.
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(batch_size=2, expected_max_diff=3.25e-3) super().test_inference_batch_single_identical(batch_size=2, expected_max_diff=5.0e-3)
def test_float16_inference(self):
super().test_float16_inference(expected_max_diff=1e-1)
def test_stable_diffusion_panorama_negative_prompt(self): def test_stable_diffusion_panorama_negative_prompt(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator device = "cpu" # ensure determinism for the device-dependent torch.Generator
......
...@@ -33,7 +33,12 @@ from diffusers import ( ...@@ -33,7 +33,12 @@ from diffusers import (
logging, logging,
) )
from diffusers.utils import load_numpy, nightly, slow, torch_device from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import CaptureLogger, enable_full_determinism, require_torch_gpu from diffusers.utils.testing_utils import (
CaptureLogger,
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin
...@@ -473,7 +478,10 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase): ...@@ -473,7 +478,10 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
outputs_offloaded = pipe(**inputs) outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated() mem_bytes_offloaded = torch.cuda.max_memory_allocated()
assert np.abs(outputs.images - outputs_offloaded.images).max() < 1e-3 images = outputs.images
images_offloaded = outputs_offloaded.images
max_diff = numpy_cosine_similarity_distance(images.flatten(), images_offloaded.flatten())
assert max_diff < 1e-3
assert mem_bytes_offloaded < mem_bytes assert mem_bytes_offloaded < mem_bytes
assert mem_bytes_offloaded < 3 * 10**9 assert mem_bytes_offloaded < 3 * 10**9
for module in pipe.text_encoder, pipe.unet, pipe.vae: for module in pipe.text_encoder, pipe.unet, pipe.vae:
......
...@@ -27,7 +27,7 @@ from diffusers import ( ...@@ -27,7 +27,7 @@ from diffusers import (
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils import load_numpy, skip_mps, slow from diffusers.utils import load_numpy, skip_mps, slow
from diffusers.utils.testing_utils import require_torch_gpu from diffusers.utils.testing_utils import numpy_cosine_similarity_distance, require_torch_gpu
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin from ..test_pipelines_common import PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin
...@@ -226,4 +226,5 @@ class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase): ...@@ -226,4 +226,5 @@ class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase):
expected_image = load_numpy( expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/attend-and-excite/elephant_glasses.npy" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/attend-and-excite/elephant_glasses.npy"
) )
assert np.abs((expected_image - image).max()) < 5e-1 max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 5e-1
...@@ -32,7 +32,7 @@ from diffusers import ( ...@@ -32,7 +32,7 @@ from diffusers import (
) )
from diffusers.models.attention_processor import AttnProcessor from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import load_numpy, slow, torch_device from diffusers.utils import load_numpy, slow, torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu from diffusers.utils.testing_utils import enable_full_determinism, numpy_cosine_similarity_distance, require_torch_gpu
enable_full_determinism() enable_full_determinism()
...@@ -364,7 +364,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): ...@@ -364,7 +364,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
# make sure that more than 5.5 GB is allocated # make sure that more than 5.5 GB is allocated
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
assert mem_bytes > 5.5 * 10**9 assert mem_bytes > 5.5 * 10**9
assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-3 max_diff = numpy_cosine_similarity_distance(image.flatten(), image_chunked.flatten())
assert max_diff < 1e-3
def test_stable_diffusion_text2img_pipeline_v_pred_default(self): def test_stable_diffusion_text2img_pipeline_v_pred_default(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -384,7 +385,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): ...@@ -384,7 +385,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
image = output.images[0] image = output.images[0]
assert image.shape == (768, 768, 3) assert image.shape == (768, 768, 3)
assert np.abs(expected_image - image).max() < 9e-1 max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-3
def test_stable_diffusion_text2img_pipeline_unflawed(self): def test_stable_diffusion_text2img_pipeline_unflawed(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -402,12 +404,13 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): ...@@ -402,12 +404,13 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k" prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k"
generator = torch.manual_seed(0) generator = torch.Generator("cpu").manual_seed(0)
output = pipe(prompt=prompt, guidance_scale=7.5, guidance_rescale=0.7, generator=generator, output_type="np") output = pipe(prompt=prompt, guidance_scale=7.5, guidance_rescale=0.7, generator=generator, output_type="np")
image = output.images[0] image = output.images[0]
assert image.shape == (768, 768, 3) assert image.shape == (768, 768, 3)
assert np.abs(expected_image - image).max() < 5e-1 max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-2
def test_stable_diffusion_text2img_pipeline_v_pred_fp16(self): def test_stable_diffusion_text2img_pipeline_v_pred_fp16(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -426,7 +429,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): ...@@ -426,7 +429,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
image = output.images[0] image = output.images[0]
assert image.shape == (768, 768, 3) assert image.shape == (768, 768, 3)
assert np.abs(expected_image - image).max() < 7.5e-1 max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-3
def test_download_local(self): def test_download_local(self):
filename = hf_hub_download("stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.safetensors") filename = hf_hub_download("stabilityai/stable-diffusion-2-1", filename="v2-1_768-ema-pruned.safetensors")
...@@ -460,7 +464,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): ...@@ -460,7 +464,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
image = pipe("a turtle", num_inference_steps=5, generator=generator, output_type="np").images[0] image = pipe("a turtle", num_inference_steps=5, generator=generator, output_type="np").images[0]
assert np.max(np.abs(image - image_ckpt)) < 1e-3 max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten())
assert max_diff < 1e-3
def test_stable_diffusion_text2img_intermediate_state_v_pred(self): def test_stable_diffusion_text2img_intermediate_state_v_pred(self):
number_of_steps = 0 number_of_steps = 0
......
...@@ -24,7 +24,6 @@ from diffusers.utils import logging ...@@ -24,7 +24,6 @@ from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
numpy_cosine_similarity_distance,
require_torch, require_torch,
torch_device, torch_device,
) )
...@@ -304,6 +303,10 @@ class PipelineTesterMixin: ...@@ -304,6 +303,10 @@ class PipelineTesterMixin:
def test_save_load_local(self, expected_max_difference=5e-4): def test_save_load_local(self, expected_max_difference=5e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -517,6 +520,10 @@ class PipelineTesterMixin: ...@@ -517,6 +520,10 @@ class PipelineTesterMixin:
def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4): def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -537,18 +544,26 @@ class PipelineTesterMixin: ...@@ -537,18 +544,26 @@ class PipelineTesterMixin:
def test_float16_inference(self, expected_max_diff=1e-2): def test_float16_inference(self, expected_max_diff=1e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
components = self.get_dummy_components() components = self.get_dummy_components()
pipe_fp16 = self.pipeline_class(**components) pipe_fp16 = self.pipeline_class(**components)
for component in pipe_fp16.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe_fp16.to(torch_device, torch.float16) pipe_fp16.to(torch_device, torch.float16)
pipe_fp16.set_progress_bar_config(disable=None) pipe_fp16.set_progress_bar_config(disable=None)
output = pipe(**self.get_dummy_inputs(torch_device))[0] output = pipe(**self.get_dummy_inputs(torch_device))[0]
output_fp16 = pipe_fp16(**self.get_dummy_inputs(torch_device))[0] output_fp16 = pipe_fp16(**self.get_dummy_inputs(torch_device))[0]
max_diff = numpy_cosine_similarity_distance(to_np(output).flatten(), to_np(output_fp16).flatten()) max_diff = np.abs(to_np(output) - to_np(output_fp16)).max()
self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.") self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.")
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA")
...@@ -557,7 +572,11 @@ class PipelineTesterMixin: ...@@ -557,7 +572,11 @@ class PipelineTesterMixin:
for name, module in components.items(): for name, module in components.items():
if hasattr(module, "half"): if hasattr(module, "half"):
components[name] = module.to(torch_device).half() components[name] = module.to(torch_device).half()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -567,6 +586,9 @@ class PipelineTesterMixin: ...@@ -567,6 +586,9 @@ class PipelineTesterMixin:
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir) pipe.save_pretrained(tmpdir)
pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16) pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16)
for component in pipe_loaded.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe_loaded.to(torch_device) pipe_loaded.to(torch_device)
pipe_loaded.set_progress_bar_config(disable=None) pipe_loaded.set_progress_bar_config(disable=None)
...@@ -579,7 +601,6 @@ class PipelineTesterMixin: ...@@ -579,7 +601,6 @@ class PipelineTesterMixin:
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs(torch_device)
output_loaded = pipe_loaded(**inputs)[0] output_loaded = pipe_loaded(**inputs)[0]
max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
self.assertLess( self.assertLess(
max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading." max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading."
...@@ -591,6 +612,9 @@ class PipelineTesterMixin: ...@@ -591,6 +612,9 @@ class PipelineTesterMixin:
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -604,6 +628,9 @@ class PipelineTesterMixin: ...@@ -604,6 +628,9 @@ class PipelineTesterMixin:
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
pipe.save_pretrained(tmpdir, safe_serialization=False) pipe.save_pretrained(tmpdir, safe_serialization=False)
pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) pipe_loaded = self.pipeline_class.from_pretrained(tmpdir)
for component in pipe_loaded.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe_loaded.to(torch_device) pipe_loaded.to(torch_device)
pipe_loaded.set_progress_bar_config(disable=None) pipe_loaded.set_progress_bar_config(disable=None)
...@@ -662,6 +689,9 @@ class PipelineTesterMixin: ...@@ -662,6 +689,9 @@ class PipelineTesterMixin:
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -686,6 +716,9 @@ class PipelineTesterMixin: ...@@ -686,6 +716,9 @@ class PipelineTesterMixin:
def test_cpu_offload_forward_pass(self, expected_max_diff=1e-4): def test_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -714,6 +747,9 @@ class PipelineTesterMixin: ...@@ -714,6 +747,9 @@ class PipelineTesterMixin:
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -731,7 +767,7 @@ class PipelineTesterMixin: ...@@ -731,7 +767,7 @@ class PipelineTesterMixin:
) )
if test_max_difference: if test_max_difference:
max_diff = np.abs(output_with_offload - output_without_offload).max() max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results") self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results")
if test_mean_pixel_difference: if test_mean_pixel_difference:
......
...@@ -150,6 +150,9 @@ class VideoToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -150,6 +150,9 @@ class VideoToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_save_load_optional_components(self):
super().test_save_load_optional_components(expected_max_difference=0.001)
@unittest.skipIf( @unittest.skipIf(
torch_device != "cuda" or not is_xformers_available(), torch_device != "cuda" or not is_xformers_available(),
reason="XFormers attention is only available with CUDA and `xformers` installed", reason="XFormers attention is only available with CUDA and `xformers` installed",
......
...@@ -406,7 +406,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -406,7 +406,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@skip_mps @skip_mps
def test_save_load_local(self): def test_save_load_local(self):
return super().test_save_load_local() return super().test_save_load_local(expected_max_difference=5e-3)
@skip_mps @skip_mps
def test_save_load_optional_components(self): def test_save_load_optional_components(self):
......
...@@ -477,7 +477,7 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -477,7 +477,7 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
@skip_mps @skip_mps
def test_save_load_local(self): def test_save_load_local(self):
return super().test_save_load_local() return super().test_save_load_local(expected_max_difference=4e-3)
@skip_mps @skip_mps
def test_save_load_optional_components(self): def test_save_load_optional_components(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment