Unverified Commit 6bd55b54 authored by chuzh's avatar chuzh Committed by GitHub
Browse files

Fix [core/GLIGEN]: TypeError when iterating over 0-d tensor with In-painting...

Fix [core/GLIGEN]: TypeError when iterating over 0-d tensor with In-painting mode when EulerAncestralDiscreteScheduler is used (#5305)

* fix(gligen_inpaint_pipeline): 🐛

 Wrap the timestep() 0-d tensor in a list to convert to 1-d tensor. This avoids the TypeError caused by trying to directly iterate over a 0-dimensional tensor in the denoising stage

* test(gligen/gligen_text_image): unit test using the EulerAncestralDiscreteScheduler

---------
Co-authored-by: default avatarzhen-hao.chu <zhen-hao.chu@vitrox.com>
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent 0513a8cf
...@@ -207,7 +207,7 @@ class CustomDiffusionDataset(Dataset): ...@@ -207,7 +207,7 @@ class CustomDiffusionDataset(Dataset):
with open(concept["class_prompt"], "r") as f: with open(concept["class_prompt"], "r") as f:
class_prompt = f.read().splitlines() class_prompt = f.read().splitlines()
class_img_path = [(x, y) for (x, y) in zip(class_images_path, class_prompt)] class_img_path = list(zip(class_images_path, class_prompt))
self.class_images_path.extend(class_img_path[:num_class_images]) self.class_images_path.extend(class_img_path[:num_class_images])
random.shuffle(self.instance_images_path) random.shuffle(self.instance_images_path)
......
...@@ -803,7 +803,9 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline): ...@@ -803,7 +803,9 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
if gligen_inpaint_image is not None: if gligen_inpaint_image is not None:
gligen_inpaint_latent_with_noise = ( gligen_inpaint_latent_with_noise = (
self.scheduler.add_noise(gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), t) self.scheduler.add_noise(
gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), torch.tensor([t])
)
.expand(latents.shape[0], -1, -1, -1) .expand(latents.shape[0], -1, -1, -1)
.clone() .clone()
) )
......
...@@ -965,7 +965,9 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): ...@@ -965,7 +965,9 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
if gligen_inpaint_image is not None: if gligen_inpaint_image is not None:
gligen_inpaint_latent_with_noise = ( gligen_inpaint_latent_with_noise = (
self.scheduler.add_noise(gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), t) self.scheduler.add_noise(
gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), torch.tensor([t])
)
.expand(latents.shape[0], -1, -1, -1) .expand(latents.shape[0], -1, -1, -1)
.clone() .clone()
) )
......
...@@ -22,6 +22,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer ...@@ -22,6 +22,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
from diffusers import ( from diffusers import (
AutoencoderKL, AutoencoderKL,
DDIMScheduler, DDIMScheduler,
EulerAncestralDiscreteScheduler,
StableDiffusionGLIGENPipeline, StableDiffusionGLIGENPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
...@@ -120,7 +121,7 @@ class GligenPipelineFastTests( ...@@ -120,7 +121,7 @@ class GligenPipelineFastTests(
} }
return inputs return inputs
def test_gligen(self): def test_stable_diffusion_gligen_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = StableDiffusionGLIGENPipeline(**components) sd_pipe = StableDiffusionGLIGENPipeline(**components)
...@@ -136,6 +137,24 @@ class GligenPipelineFastTests( ...@@ -136,6 +137,24 @@ class GligenPipelineFastTests(
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_stable_diffusion_gligen_k_euler_ancestral(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components()
sd_pipe = StableDiffusionGLIGENPipeline(**components)
sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe = sd_pipe.to(device)
sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(device)
output = sd_pipe(**inputs)
image = output.images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.425, 0.494, 0.429, 0.469, 0.525, 0.417, 0.533, 0.5, 0.47])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_attention_slicing_forward_pass(self): def test_attention_slicing_forward_pass(self):
super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) super().test_attention_slicing_forward_pass(expected_max_diff=3e-3)
......
...@@ -29,6 +29,7 @@ from transformers import ( ...@@ -29,6 +29,7 @@ from transformers import (
from diffusers import ( from diffusers import (
AutoencoderKL, AutoencoderKL,
DDIMScheduler, DDIMScheduler,
EulerAncestralDiscreteScheduler,
StableDiffusionGLIGENTextImagePipeline, StableDiffusionGLIGENTextImagePipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
...@@ -150,7 +151,7 @@ class GligenTextImagePipelineFastTests( ...@@ -150,7 +151,7 @@ class GligenTextImagePipelineFastTests(
} }
return inputs return inputs
def test_gligen(self): def test_stable_diffusion_gligen_text_image_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = StableDiffusionGLIGENTextImagePipeline(**components) sd_pipe = StableDiffusionGLIGENTextImagePipeline(**components)
...@@ -166,6 +167,24 @@ class GligenTextImagePipelineFastTests( ...@@ -166,6 +167,24 @@ class GligenTextImagePipelineFastTests(
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_stable_diffusion_gligen_k_euler_ancestral(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components()
sd_pipe = StableDiffusionGLIGENTextImagePipeline(**components)
sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config)
sd_pipe = sd_pipe.to(device)
sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(device)
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.425, 0.494, 0.429, 0.469, 0.525, 0.417, 0.533, 0.5, 0.47])
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_attention_slicing_forward_pass(self): def test_attention_slicing_forward_pass(self):
super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) super().test_attention_slicing_forward_pass(expected_max_diff=3e-3)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment