Unverified Commit 9b638548 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Improve reproduceability 2/3 (#1906)

* [Repro] Correct reproducability

* up

* up

* uP

* up

* need better image

* allow conversion from no state dict checkpoints

* up

* up

* up

* up

* check tensors

* check tensors

* check tensors

* check tensors

* next try

* up

* up

* better name

* up

* up

* Apply suggestions from code review

* correct more

* up

* replace all torch randn

* fix

* correct

* correct

* finish

* fix more

* up
parent 67e2f95c
......@@ -21,7 +21,7 @@ from typing import Optional, Tuple, Union
import torch
from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput
from ..utils import BaseOutput, randn_tensor
from .scheduling_utils import SchedulerMixin, SchedulerOutput
......@@ -201,7 +201,9 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
drift = drift - diffusion**2 * model_output
# equation 6: sample noise for the diffusion term of
noise = torch.randn(sample.shape, layout=sample.layout, generator=generator).to(sample.device)
noise = randn_tensor(
sample.shape, layout=sample.layout, generator=generator, device=sample.device, dtype=sample.dtype
)
prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep
# TODO is the variable diffusion the correct scaling term for the noise?
prev_sample = prev_sample_mean + diffusion * noise # add impact of diffusion field g
......@@ -241,7 +243,7 @@ class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin):
# For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z"
# sample noise for correction
noise = torch.randn(sample.shape, layout=sample.layout, generator=generator).to(sample.device)
noise = randn_tensor(sample.shape, layout=sample.layout, generator=generator).to(sample.device)
# compute step size from the model_output, the noise, and the snr
grad_norm = torch.norm(model_output.reshape(model_output.shape[0], -1), dim=-1).mean()
......
......@@ -20,6 +20,7 @@ from typing import Union
import torch
from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import randn_tensor
from .scheduling_utils import SchedulerMixin
......@@ -80,7 +81,7 @@ class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
x_mean = x + drift * dt
# add noise
noise = torch.randn(x.shape, layout=x.layout, generator=generator).to(x.device)
noise = randn_tensor(x.shape, layout=x.layout, generator=generator, device=x.device, dtype=x.dtype)
x = x_mean + diffusion * math.sqrt(-dt) * noise
return x, x_mean
......
......@@ -20,7 +20,7 @@ import numpy as np
import torch
from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput, torch_randn
from ..utils import BaseOutput, randn_tensor
from .scheduling_utils import SchedulerMixin
......@@ -273,7 +273,7 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
# 6. Add noise
variance = 0
if t > 0:
variance_noise = torch_randn(
variance_noise = randn_tensor(
model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device
)
......
......@@ -64,7 +64,7 @@ from .import_utils import (
from .logging import get_logger
from .outputs import BaseOutput
from .pil_utils import PIL_INTERPOLATION
from .torch_utils import torch_randn
from .torch_utils import randn_tensor
if is_torch_available():
......
......@@ -26,11 +26,12 @@ if is_torch_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
def torch_randn(
def randn_tensor(
shape: Union[Tuple, List],
generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
device: Optional["torch.device"] = None,
dtype: Optional["torch.dtype"] = None,
layout: Optional["torch.layout"] = None,
):
"""This is a helper function that allows to create random tensors on the desired `device` with the desired `dtype`. When
passing a list of generators one can seed each batched size individually. If CPU generators are passed the tensor
......@@ -40,8 +41,12 @@ def torch_randn(
rand_device = device
batch_size = shape[0]
layout = layout or torch.strided
device = device or torch.device("cpu")
if generator is not None:
if generator.device != device and generator.device.type == "cpu":
gen_device_type = generator.device.type if not isinstance(generator, list) else generator[0].device.type
if gen_device_type != device.type and gen_device_type == "cpu":
rand_device = "cpu"
if device != "mps":
logger.info(
......@@ -49,16 +54,17 @@ def torch_randn(
f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
f" slighly speed up this function by passing a generator that was created on the {device} device."
)
elif generator.device.type != device.type and generator.device.type == "cuda":
raise ValueError(f"Cannot generate a {device} tensor from a generator of type {generator.device.type}.")
elif gen_device_type != device.type and gen_device_type == "cuda":
raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
if isinstance(generator, list):
shape = (1,) + shape[1:]
latents = [
torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) for i in range(batch_size)
torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype, layout=layout)
for i in range(batch_size)
]
latents = torch.cat(latents, dim=0).to(device)
else:
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
return latents
......@@ -25,44 +25,6 @@ from diffusers.utils.testing_utils import require_torch, slow, torch_device
torch.backends.cuda.matmul.allow_tf32 = False
class KarrasVePipelineFastTests(unittest.TestCase):
@property
def dummy_uncond_unet(self):
torch.manual_seed(0)
model = UNet2DModel(
block_out_channels=(32, 64),
layers_per_block=2,
sample_size=32,
in_channels=3,
out_channels=3,
down_block_types=("DownBlock2D", "AttnDownBlock2D"),
up_block_types=("AttnUpBlock2D", "UpBlock2D"),
)
return model
def test_inference(self):
unet = self.dummy_uncond_unet
scheduler = KarrasVeScheduler()
pipe = KarrasVePipeline(unet=unet, scheduler=scheduler)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.manual_seed(0)
image = pipe(num_inference_steps=2, generator=generator, output_type="numpy").images
generator = torch.manual_seed(0)
image_from_tuple = pipe(num_inference_steps=2, generator=generator, output_type="numpy", return_dict=False)[0]
image_slice = image[0, -3:, -3:, -1]
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
assert image.shape == (1, 32, 32, 3)
expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
@slow
@require_torch
class KarrasVePipelineIntegrationTests(unittest.TestCase):
......
......@@ -132,7 +132,7 @@ class PaintByExamplePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4397, 0.5553, 0.3802, 0.5222, 0.5811, 0.4342, 0.494, 0.4577, 0.4428])
expected_slice = np.array([0.4701, 0.5555, 0.3994, 0.5107, 0.5691, 0.4517, 0.5125, 0.4769, 0.4539])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
......
......@@ -21,7 +21,7 @@ import torch
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
from diffusers.utils import load_numpy, slow, torch_device
from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
......@@ -363,6 +363,37 @@ class UnCLIPPipelineFastTests(unittest.TestCase):
assert np.abs(image - image_from_text).max() < 1e-4
@nightly
class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_unclip_karlo_cpu_fp32(self):
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/unclip/karlo_v1_alpha_horse_cpu.npy"
)
pipeline = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha")
pipeline.set_progress_bar_config(disable=None)
generator = torch.manual_seed(0)
output = pipeline(
"horse",
num_images_per_prompt=1,
generator=generator,
output_type="np",
)
image = output.images[0]
assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 1e-1
@slow
@require_torch_gpu
class UnCLIPPipelineIntegrationTests(unittest.TestCase):
......@@ -385,15 +416,19 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipeline(
"horse",
num_images_per_prompt=1,
generator=generator,
output_type="np",
)
image = output.images[0]
image = np.asarray(pipeline.numpy_to_pil(output.images)[0], dtype=np.float32)
expected_image = np.asarray(pipeline.numpy_to_pil(expected_image)[0], dtype=np.float32)
# Karlo is extremely likely to strongly deviate depending on which hardware is used
# Here we just check that the image doesn't deviate more than 10 pixels from the reference image on average
avg_diff = np.abs(image - expected_image).mean()
assert avg_diff < 10, f"Error image deviates {avg_diff} pixels on average"
assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 1e-2
def test_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache()
......
......@@ -475,20 +475,25 @@ class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase):
"/unclip/karlo_v1_alpha_cat_variation_fp16.npy"
)
pipeline = UnCLIPImageVariationPipeline.from_pretrained("fusing/karlo-image-variations-diffusers")
pipeline = UnCLIPImageVariationPipeline.from_pretrained(
"fusing/karlo-image-variations-diffusers", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline.set_progress_bar_config(disable=None)
pipeline.enable_sequential_cpu_offload()
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipeline(
input_image,
num_images_per_prompt=1,
generator=generator,
output_type="np",
)
image = output.images[0]
image = np.asarray(pipeline.numpy_to_pil(output.images)[0], dtype=np.float32)
expected_image = np.asarray(pipeline.numpy_to_pil(expected_image)[0], dtype=np.float32)
# Karlo is extremely likely to strongly deviate depending on which hardware is used
# Here we just check that the image doesn't deviate more than 10 pixels from the reference image on average
avg_diff = np.abs(image - expected_image).mean()
assert avg_diff < 10, f"Error image deviates {avg_diff} pixels on average"
assert image.shape == (256, 256, 3)
assert np.abs(expected_image - image).max() < 5e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment