Unverified Commit 35099b20 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Versatile Diffusion] Fix remaining tests (#1418)

fix all tests
parent 2c6bc0f1
...@@ -65,6 +65,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -65,6 +65,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
vae: AutoencoderKL vae: AutoencoderKL
scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler] scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]
_optional_components = ["text_unet"]
def __init__( def __init__(
self, self,
tokenizer: CLIPTokenizer, tokenizer: CLIPTokenizer,
...@@ -143,6 +145,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -143,6 +145,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
index = int(index) index = int(index)
self.image_unet.get_submodule(parent_name)[index] = module.transformers[0] self.image_unet.get_submodule(parent_name)[index] = module.transformers[0]
self.image_unet.register_to_config(dual_cross_attention=False)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet
def enable_xformers_memory_efficient_attention(self): def enable_xformers_memory_efficient_attention(self):
r""" r"""
......
...@@ -57,6 +57,8 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline): ...@@ -57,6 +57,8 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
vae: AutoencoderKL vae: AutoencoderKL
scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler] scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]
_optional_components = ["text_unet"]
def __init__( def __init__(
self, self,
tokenizer: CLIPTokenizer, tokenizer: CLIPTokenizer,
......
...@@ -54,6 +54,5 @@ class VersatileDiffusionImageVariationPipelineIntegrationTests(unittest.TestCase ...@@ -54,6 +54,5 @@ class VersatileDiffusionImageVariationPipelineIntegrationTests(unittest.TestCase
image_slice = image[0, 253:256, 253:256, -1] image_slice = image[0, 253:256, 253:256, -1]
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
print(torch.from_numpy(image_slice.flatten())) expected_slice = np.array([0.1205, 0.1914, 0.2289, 0.0883, 0.1595, 0.1683, 0.0703, 0.1493, 0.1298])
expected_slice = np.array([0.0113, 0.2241, 0.4024, 0.0839, 0.0871, 0.2725, 0.2581, 0.0, 0.1096])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
...@@ -104,7 +104,7 @@ class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase): ...@@ -104,7 +104,7 @@ class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase):
image_slice = image[0, 253:256, 253:256, -1] image_slice = image[0, 253:256, 253:256, -1]
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.014, 0.0112, 0.0136, 0.0145, 0.0107, 0.0113, 0.0272, 0.0215, 0.0216]) expected_slice = np.array([0.0081, 0.0032, 0.0002, 0.0056, 0.0027, 0.0000, 0.0051, 0.0020, 0.0007])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
prompt = "A painting of a squirrel eating a burger " prompt = "A painting of a squirrel eating a burger "
...@@ -119,11 +119,10 @@ class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase): ...@@ -119,11 +119,10 @@ class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase):
expected_slice = np.array([0.0408, 0.0181, 0.0, 0.0388, 0.0046, 0.0461, 0.0411, 0.0, 0.0222]) expected_slice = np.array([0.0408, 0.0181, 0.0, 0.0388, 0.0046, 0.0461, 0.0411, 0.0, 0.0222])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
pipe = VersatileDiffusionPipeline.from_pretrained("shi-labs/versatile-diffusion", torch_dtype=torch.float16) image = pipe.image_variation(init_image, generator=generator, output_type="numpy").images
image = pipe.image_variation(init_image, generator=generator, output_type="numpy").images[0]
image_slice = image[0, 253:256, 253:256, -1] image_slice = image[0, 253:256, 253:256, -1]
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.0657, 0.0529, 0.0455, 0.0802, 0.0570, 0.0179, 0.0267, 0.0483, 0.0769]) expected_slice = np.array([0.3479, 0.1943, 0.1060, 0.3894, 0.2537, 0.1394, 0.3989, 0.3191, 0.1987])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment