Unverified Commit 9b638548 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Improve reproduceability 2/3 (#1906)

* [Repro] Correct reproducability

* up

* up

* uP

* up

* need better image

* allow conversion from no state dict checkpoints

* up

* up

* up

* up

* check tensors

* check tensors

* check tensors

* check tensors

* next try

* up

* up

* better name

* up

* up

* Apply suggestions from code review

* correct more

* up

* replace all torch randn

* fix

* correct

* correct

* finish

* fix more

* up
parent 67e2f95c
...@@ -21,7 +21,7 @@ from typing import Optional, Tuple, Union ...@@ -21,7 +21,7 @@ from typing import Optional, Tuple, Union
import torch import torch
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput from ..utils import BaseOutput, randn_tensor
from .scheduling_utils import SchedulerMixin, SchedulerOutput from .scheduling_utils import SchedulerMixin, SchedulerOutput
...@@ -201,7 +201,9 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): ...@@ -201,7 +201,9 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
drift = drift - diffusion**2 * model_output drift = drift - diffusion**2 * model_output
# equation 6: sample noise for the diffusion term of # equation 6: sample noise for the diffusion term of
noise = torch.randn(sample.shape, layout=sample.layout, generator=generator).to(sample.device) noise = randn_tensor(
sample.shape, layout=sample.layout, generator=generator, device=sample.device, dtype=sample.dtype
)
prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep
# TODO is the variable diffusion the correct scaling term for the noise? # TODO is the variable diffusion the correct scaling term for the noise?
prev_sample = prev_sample_mean + diffusion * noise # add impact of diffusion field g prev_sample = prev_sample_mean + diffusion * noise # add impact of diffusion field g
...@@ -241,7 +243,7 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): ...@@ -241,7 +243,7 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
# For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z"
# sample noise for correction # sample noise for correction
noise = torch.randn(sample.shape, layout=sample.layout, generator=generator).to(sample.device) noise = randn_tensor(sample.shape, layout=sample.layout, generator=generator).to(sample.device)
# compute step size from the model_output, the noise, and the snr # compute step size from the model_output, the noise, and the snr
grad_norm = torch.norm(model_output.reshape(model_output.shape[0], -1), dim=-1).mean() grad_norm = torch.norm(model_output.reshape(model_output.shape[0], -1), dim=-1).mean()
......
...@@ -20,6 +20,7 @@ from typing import Union ...@@ -20,6 +20,7 @@ from typing import Union
import torch import torch
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import randn_tensor
from .scheduling_utils import SchedulerMixin from .scheduling_utils import SchedulerMixin
...@@ -80,7 +81,7 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): ...@@ -80,7 +81,7 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
x_mean = x + drift * dt x_mean = x + drift * dt
# add noise # add noise
noise = torch.randn(x.shape, layout=x.layout, generator=generator).to(x.device) noise = randn_tensor(x.shape, layout=x.layout, generator=generator, device=x.device, dtype=x.dtype)
x = x_mean + diffusion * math.sqrt(-dt) * noise x = x_mean + diffusion * math.sqrt(-dt) * noise
return x, x_mean return x, x_mean
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import torch import torch
from ..configuration_utils import ConfigMixin, register_to_config from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput, torch_randn from ..utils import BaseOutput, randn_tensor
from .scheduling_utils import SchedulerMixin from .scheduling_utils import SchedulerMixin
...@@ -273,7 +273,7 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin): ...@@ -273,7 +273,7 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
# 6. Add noise # 6. Add noise
variance = 0 variance = 0
if t > 0: if t > 0:
variance_noise = torch_randn( variance_noise = randn_tensor(
model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device
) )
......
...@@ -64,7 +64,7 @@ from .import_utils import ( ...@@ -64,7 +64,7 @@ from .import_utils import (
from .logging import get_logger from .logging import get_logger
from .outputs import BaseOutput from .outputs import BaseOutput
from .pil_utils import PIL_INTERPOLATION from .pil_utils import PIL_INTERPOLATION
from .torch_utils import torch_randn from .torch_utils import randn_tensor
if is_torch_available(): if is_torch_available():
......
...@@ -26,11 +26,12 @@ if is_torch_available(): ...@@ -26,11 +26,12 @@ if is_torch_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name logger = logging.get_logger(__name__) # pylint: disable=invalid-name
def torch_randn( def randn_tensor(
shape: Union[Tuple, List], shape: Union[Tuple, List],
generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None, generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
device: Optional["torch.device"] = None, device: Optional["torch.device"] = None,
dtype: Optional["torch.dtype"] = None, dtype: Optional["torch.dtype"] = None,
layout: Optional["torch.layout"] = None,
): ):
"""This is a helper function that allows to create random tensors on the desired `device` with the desired `dtype`. When """This is a helper function that allows to create random tensors on the desired `device` with the desired `dtype`. When
passing a list of generators one can seed each batched size individually. If CPU generators are passed the tensor passing a list of generators one can seed each batched size individually. If CPU generators are passed the tensor
...@@ -40,8 +41,12 @@ def torch_randn( ...@@ -40,8 +41,12 @@ def torch_randn(
rand_device = device rand_device = device
batch_size = shape[0] batch_size = shape[0]
layout = layout or torch.strided
device = device or torch.device("cpu")
if generator is not None: if generator is not None:
if generator.device != device and generator.device.type == "cpu": gen_device_type = generator.device.type if not isinstance(generator, list) else generator[0].device.type
if gen_device_type != device.type and gen_device_type == "cpu":
rand_device = "cpu" rand_device = "cpu"
if device != "mps": if device != "mps":
logger.info( logger.info(
...@@ -49,16 +54,17 @@ def torch_randn( ...@@ -49,16 +54,17 @@ def torch_randn(
f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably" f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
f" slighly speed up this function by passing a generator that was created on the {device} device." f" slighly speed up this function by passing a generator that was created on the {device} device."
) )
elif generator.device.type != device.type and generator.device.type == "cuda": elif gen_device_type != device.type and gen_device_type == "cuda":
raise ValueError(f"Cannot generate a {device} tensor from a generator of type {generator.device.type}.") raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
if isinstance(generator, list): if isinstance(generator, list):
shape = (1,) + shape[1:] shape = (1,) + shape[1:]
latents = [ latents = [
torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) for i in range(batch_size) torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype, layout=layout)
for i in range(batch_size)
] ]
latents = torch.cat(latents, dim=0).to(device) latents = torch.cat(latents, dim=0).to(device)
else: else:
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device) latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
return latents return latents
...@@ -25,44 +25,6 @@ from diffusers.utils.testing_utils import require_torch, slow, torch_device ...@@ -25,44 +25,6 @@ from diffusers.utils.testing_utils import require_torch, slow, torch_device
torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cuda.matmul.allow_tf32 = False
class KarrasVePipelineFastTests(unittest.TestCase):
@property
def dummy_uncond_unet(self):
torch.manual_seed(0)
model = UNet2DModel(
block_out_channels=(32, 64),
layers_per_block=2,
sample_size=32,
in_channels=3,
out_channels=3,
down_block_types=("DownBlock2D", "AttnDownBlock2D"),
up_block_types=("AttnUpBlock2D", "UpBlock2D"),
)
return model
def test_inference(self):
unet = self.dummy_uncond_unet
scheduler = KarrasVeScheduler()
pipe = KarrasVePipeline(unet=unet, scheduler=scheduler)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.manual_seed(0)
image = pipe(num_inference_steps=2, generator=generator, output_type="numpy").images
generator = torch.manual_seed(0)
image_from_tuple = pipe(num_inference_steps=2, generator=generator, output_type="numpy", return_dict=False)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
@slow @slow
@require_torch @require_torch
class KarrasVePipelineIntegrationTests(unittest.TestCase): class KarrasVePipelineIntegrationTests(unittest.TestCase):
......
...@@ -132,7 +132,7 @@ class PaintByExamplePipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -132,7 +132,7 @@ class PaintByExamplePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3) assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4397, 0.5553, 0.3802, 0.5222, 0.5811, 0.4342, 0.494, 0.4577, 0.4428]) expected_slice = np.array([0.4701, 0.5555, 0.3994, 0.5107, 0.5691, 0.4517, 0.5125, 0.4769, 0.4539])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
......
...@@ -21,7 +21,7 @@ import torch ...@@ -21,7 +21,7 @@ import torch
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
from diffusers.utils import load_numpy, slow, torch_device from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu from diffusers.utils.testing_utils import require_torch_gpu
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
...@@ -363,6 +363,37 @@ class UnCLIPPipelineFastTests(unittest.TestCase): ...@@ -363,6 +363,37 @@ class UnCLIPPipelineFastTests(unittest.TestCase):
assert np.abs(image - image_from_text).max() < 1e-4 assert np.abs(image - image_from_text).max() < 1e-4
@nightly
class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_unclip_karlo_cpu_fp32(self):
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/unclip/karlo_v1_alpha_horse_cpu.npy"
)
pipeline = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha")
pipeline.set_progress_bar_config(disable=None)
generator = torch.manual_seed(0)
output = pipeline(
"horse",
num_images_per_prompt=1,
generator=generator,
output_type="np",
)
image = output.images[0]
assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 1e-1
@slow @slow
@require_torch_gpu @require_torch_gpu
class UnCLIPPipelineIntegrationTests(unittest.TestCase): class UnCLIPPipelineIntegrationTests(unittest.TestCase):
...@@ -385,15 +416,19 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase): ...@@ -385,15 +416,19 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
output = pipeline( output = pipeline(
"horse", "horse",
num_images_per_prompt=1,
generator=generator, generator=generator,
output_type="np", output_type="np",
) )
image = output.images[0] image = np.asarray(pipeline.numpy_to_pil(output.images)[0], dtype=np.float32)
expected_image = np.asarray(pipeline.numpy_to_pil(expected_image)[0], dtype=np.float32)
# Karlo is extremely likely to strongly deviate depending on which hardware is used
# Here we just check that the image doesn't deviate more than 10 pixels from the reference image on average
avg_diff = np.abs(image - expected_image).mean()
assert avg_diff < 10, f"Error image deviates {avg_diff} pixels on average"
assert image.shape == (256, 256, 3) assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 1e-2
def test_unclip_pipeline_with_sequential_cpu_offloading(self): def test_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache() torch.cuda.empty_cache()
......
...@@ -475,20 +475,25 @@ class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase): ...@@ -475,20 +475,25 @@ class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase):
"/unclip/karlo_v1_alpha_cat_variation_fp16.npy" "/unclip/karlo_v1_alpha_cat_variation_fp16.npy"
) )
pipeline = UnCLIPImageVariationPipeline.from_pretrained("fusing/karlo-image-variations-diffusers") pipeline = UnCLIPImageVariationPipeline.from_pretrained(
"fusing/karlo-image-variations-diffusers", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device) pipeline = pipeline.to(torch_device)
pipeline.set_progress_bar_config(disable=None) pipeline.set_progress_bar_config(disable=None)
pipeline.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
output = pipeline( output = pipeline(
input_image, input_image,
num_images_per_prompt=1,
generator=generator, generator=generator,
output_type="np", output_type="np",
) )
image = output.images[0] image = np.asarray(pipeline.numpy_to_pil(output.images)[0], dtype=np.float32)
expected_image = np.asarray(pipeline.numpy_to_pil(expected_image)[0], dtype=np.float32)
# Karlo is extremely likely to strongly deviate depending on which hardware is used
# Here we just check that the image doesn't deviate more than 10 pixels from the reference image on average
avg_diff = np.abs(image - expected_image).mean()
assert avg_diff < 10, f"Error image deviates {avg_diff} pixels on average"
assert image.shape == (256, 256, 3) assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 5e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment