Unverified Commit 8ac17cd2 authored by Dhruv Nair's avatar Dhruv Nair Committed by GitHub
Browse files

[Modular] Some clean up for Modular tests (#12579)



* update

* update

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
parent e4393fa6
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import random import random
import tempfile import tempfile
import unittest
import numpy as np import numpy as np
import PIL import PIL
...@@ -34,21 +33,16 @@ from ...testing_utils import floats_tensor, torch_device ...@@ -34,21 +33,16 @@ from ...testing_utils import floats_tensor, torch_device
from ..test_modular_pipelines_common import ModularPipelineTesterMixin from ..test_modular_pipelines_common import ModularPipelineTesterMixin
class FluxModularTests: class TestFluxModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxModularPipeline pipeline_class = FluxModularPipeline
pipeline_blocks_class = FluxAutoBlocks pipeline_blocks_class = FluxAutoBlocks
repo = "hf-internal-testing/tiny-flux-modular" repo = "hf-internal-testing/tiny-flux-modular"
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): params = frozenset(["prompt", "height", "width", "guidance_scale"])
pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) batch_params = frozenset(["prompt"])
pipeline.load_components(torch_dtype=torch_dtype)
return pipeline
def get_dummy_inputs(self, device, seed=0): def get_dummy_inputs(self, seed=0):
if str(device).startswith("mps"): generator = self.get_generator(seed)
generator = torch.manual_seed(seed)
else:
generator = torch.Generator(device=device).manual_seed(seed)
inputs = { inputs = {
"prompt": "A painting of a squirrel eating a burger", "prompt": "A painting of a squirrel eating a burger",
"generator": generator, "generator": generator,
...@@ -57,36 +51,47 @@ class FluxModularTests: ...@@ -57,36 +51,47 @@ class FluxModularTests:
"height": 8, "height": 8,
"width": 8, "width": 8,
"max_sequence_length": 48, "max_sequence_length": 48,
"output_type": "np", "output_type": "pt",
} }
return inputs return inputs
class FluxModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin):
params = frozenset(["prompt", "height", "width", "guidance_scale"]) pipeline_class = FluxModularPipeline
batch_params = frozenset(["prompt"]) pipeline_blocks_class = FluxAutoBlocks
repo = "hf-internal-testing/tiny-flux-modular"
class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase):
params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
batch_params = frozenset(["prompt", "image"]) batch_params = frozenset(["prompt", "image"])
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
pipeline = super().get_pipeline(components_manager, torch_dtype) pipeline = super().get_pipeline(components_manager, torch_dtype)
# Override `vae_scale_factor` here as currently, `image_processor` is initialized with # Override `vae_scale_factor` here as currently, `image_processor` is initialized with
# fixed constants instead of # fixed constants instead of
# https://github.com/huggingface/diffusers/blob/d54622c2679d700b425ad61abce9b80fc36212c0/src/diffusers/pipelines/flux/pipeline_flux_img2img.py#L230C9-L232C10 # https://github.com/huggingface/diffusers/blob/d54622c2679d700b425ad61abce9b80fc36212c0/src/diffusers/pipelines/flux/pipeline_flux_img2img.py#L230C9-L232C10
pipeline.image_processor = VaeImageProcessor(vae_scale_factor=2) pipeline.image_processor = VaeImageProcessor(vae_scale_factor=2)
return pipeline return pipeline
def get_dummy_inputs(self, device, seed=0): def get_dummy_inputs(self, seed=0):
inputs = super().get_dummy_inputs(device, seed) generator = self.get_generator(seed)
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) inputs = {
image = image / 2 + 0.5 "prompt": "A painting of a squirrel eating a burger",
inputs["image"] = image "generator": generator,
inputs["strength"] = 0.8 "num_inference_steps": 4,
inputs["height"] = 8 "guidance_scale": 5.0,
inputs["width"] = 8 "height": 8,
"width": 8,
"max_sequence_length": 48,
"output_type": "pt",
}
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device)
image = image.cpu().permute(0, 2, 3, 1)[0]
init_image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
inputs["image"] = init_image
inputs["strength"] = 0.5
return inputs return inputs
def test_save_from_pretrained(self): def test_save_from_pretrained(self):
...@@ -96,6 +101,7 @@ class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTeste ...@@ -96,6 +101,7 @@ class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTeste
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
base_pipe.save_pretrained(tmpdirname) base_pipe.save_pretrained(tmpdirname)
pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device)
pipe.load_components(torch_dtype=torch.float32) pipe.load_components(torch_dtype=torch.float32)
pipe.to(torch_device) pipe.to(torch_device)
...@@ -105,26 +111,62 @@ class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTeste ...@@ -105,26 +111,62 @@ class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTeste
image_slices = [] image_slices = []
for pipe in pipes: for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
image = pipe(**inputs, output="images") image = pipe(**inputs, output="images")
image_slices.append(image[0, -3:, -3:, -1].flatten()) image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
class FluxKontextModularPipelineFastTests(FluxImg2ImgModularPipelineFastTests): class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin):
pipeline_class = FluxKontextModularPipeline pipeline_class = FluxKontextModularPipeline
pipeline_blocks_class = FluxKontextAutoBlocks pipeline_blocks_class = FluxKontextAutoBlocks
repo = "hf-internal-testing/tiny-flux-kontext-pipe" repo = "hf-internal-testing/tiny-flux-kontext-pipe"
def get_dummy_inputs(self, device, seed=0): params = frozenset(["prompt", "height", "width", "guidance_scale", "image"])
inputs = super().get_dummy_inputs(device, seed) batch_params = frozenset(["prompt", "image"])
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"generator": generator,
"num_inference_steps": 2,
"guidance_scale": 5.0,
"height": 8,
"width": 8,
"max_sequence_length": 48,
"output_type": "pt",
}
image = PIL.Image.new("RGB", (32, 32), 0) image = PIL.Image.new("RGB", (32, 32), 0)
_ = inputs.pop("strength")
inputs["image"] = image inputs["image"] = image
inputs["height"] = 8 inputs["max_area"] = inputs["height"] * inputs["width"]
inputs["width"] = 8
inputs["max_area"] = 8 * 8
inputs["_auto_resize"] = False inputs["_auto_resize"] = False
return inputs return inputs
def test_save_from_pretrained(self):
pipes = []
base_pipe = self.get_pipeline().to(torch_device)
pipes.append(base_pipe)
with tempfile.TemporaryDirectory() as tmpdirname:
base_pipe.save_pretrained(tmpdirname)
pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device)
pipe.load_components(torch_dtype=torch.float32)
pipe.to(torch_device)
pipe.image_processor = VaeImageProcessor(vae_scale_factor=2)
pipes.append(pipe)
image_slices = []
for pipe in pipes:
inputs = self.get_dummy_inputs()
image = pipe(**inputs, output="images")
image_slices.append(image[0, -3:, -3:, -1].flatten())
assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
import random import random
import unittest
from typing import Any, Dict from typing import Any, Dict
import numpy as np import numpy as np
...@@ -32,63 +31,26 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin ...@@ -32,63 +31,26 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin
enable_full_determinism() enable_full_determinism()
class SDXLModularTests: class SDXLModularTesterMixin:
""" """
This mixin defines method to create pipeline, base input and base test across all SDXL modular tests. This mixin defines method to create pipeline, base input and base test across all SDXL modular tests.
""" """
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
repo = "hf-internal-testing/tiny-sdxl-modular"
params = frozenset(
[
"prompt",
"height",
"width",
"negative_prompt",
"cross_attention_kwargs",
"image",
"mask_image",
]
)
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
pipeline.load_components(torch_dtype=torch_dtype)
return pipeline
def get_dummy_inputs(self, device, seed=0):
if str(device).startswith("mps"):
generator = torch.manual_seed(seed)
else:
generator = torch.Generator(device=device).manual_seed(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"generator": generator,
"num_inference_steps": 2,
"output_type": "np",
}
return inputs
def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2): def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
sd_pipe = self.get_pipeline() sd_pipe = self.get_pipeline()
sd_pipe = sd_pipe.to(device) sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None) sd_pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(device) inputs = self.get_dummy_inputs()
image = sd_pipe(**inputs, output="images") image = sd_pipe(**inputs, output="images")
image_slice = image[0, -3:, -3:, -1] image_slice = image[0, -3:, -3:, -1]
assert image.shape == expected_image_shape assert image.shape == expected_image_shape
max_diff = torch.abs(image_slice.flatten() - expected_slice).max()
assert np.abs(image_slice.flatten() - expected_slice).max() < expected_max_diff, ( assert max_diff < expected_max_diff, f"Image slice does not match expected slice. Max Difference: {max_diff}"
"Image Slice does not match expected slice"
)
class SDXLModularIPAdapterTests: class SDXLModularIPAdapterTesterMixin:
""" """
This mixin is designed to test IP Adapter. This mixin is designed to test IP Adapter.
""" """
...@@ -127,7 +89,7 @@ class SDXLModularIPAdapterTests: ...@@ -127,7 +89,7 @@ class SDXLModularIPAdapterTests:
if "image" in parameters and "strength" in parameters: if "image" in parameters and "strength" in parameters:
inputs["num_inference_steps"] = 4 inputs["num_inference_steps"] = 4
inputs["output_type"] = "np" inputs["output_type"] = "pt"
return inputs return inputs
def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=None): def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=None):
...@@ -152,7 +114,7 @@ class SDXLModularIPAdapterTests: ...@@ -152,7 +114,7 @@ class SDXLModularIPAdapterTests:
cross_attention_dim = pipe.unet.config.get("cross_attention_dim") cross_attention_dim = pipe.unet.config.get("cross_attention_dim")
# forward pass without ip adapter # forward pass without ip adapter
inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs())
if expected_pipe_slice is None: if expected_pipe_slice is None:
output_without_adapter = pipe(**inputs, output="images") output_without_adapter = pipe(**inputs, output="images")
else: else:
...@@ -163,7 +125,7 @@ class SDXLModularIPAdapterTests: ...@@ -163,7 +125,7 @@ class SDXLModularIPAdapterTests:
pipe.unet._load_ip_adapter_weights(adapter_state_dict) pipe.unet._load_ip_adapter_weights(adapter_state_dict)
# forward pass with single ip adapter, but scale=0 which should have no effect # forward pass with single ip adapter, but scale=0 which should have no effect
inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs())
inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)]
inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)]
pipe.set_ip_adapter_scale(0.0) pipe.set_ip_adapter_scale(0.0)
...@@ -172,7 +134,7 @@ class SDXLModularIPAdapterTests: ...@@ -172,7 +134,7 @@ class SDXLModularIPAdapterTests:
output_without_adapter_scale = output_without_adapter_scale[0, -3:, -3:, -1].flatten() output_without_adapter_scale = output_without_adapter_scale[0, -3:, -3:, -1].flatten()
# forward pass with single ip adapter, but with scale of adapter weights # forward pass with single ip adapter, but with scale of adapter weights
inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs())
inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)]
inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)]
pipe.set_ip_adapter_scale(42.0) pipe.set_ip_adapter_scale(42.0)
...@@ -180,8 +142,8 @@ class SDXLModularIPAdapterTests: ...@@ -180,8 +142,8 @@ class SDXLModularIPAdapterTests:
if expected_pipe_slice is not None: if expected_pipe_slice is not None:
output_with_adapter_scale = output_with_adapter_scale[0, -3:, -3:, -1].flatten() output_with_adapter_scale = output_with_adapter_scale[0, -3:, -3:, -1].flatten()
max_diff_without_adapter_scale = np.abs(output_without_adapter_scale - output_without_adapter).max() max_diff_without_adapter_scale = torch.abs(output_without_adapter_scale - output_without_adapter).max()
max_diff_with_adapter_scale = np.abs(output_with_adapter_scale - output_without_adapter).max() max_diff_with_adapter_scale = torch.abs(output_with_adapter_scale - output_without_adapter).max()
assert max_diff_without_adapter_scale < expected_max_diff, ( assert max_diff_without_adapter_scale < expected_max_diff, (
"Output without ip-adapter must be same as normal inference" "Output without ip-adapter must be same as normal inference"
...@@ -194,7 +156,7 @@ class SDXLModularIPAdapterTests: ...@@ -194,7 +156,7 @@ class SDXLModularIPAdapterTests:
pipe.unet._load_ip_adapter_weights([adapter_state_dict_1, adapter_state_dict_2]) pipe.unet._load_ip_adapter_weights([adapter_state_dict_1, adapter_state_dict_2])
# forward pass with multi ip adapter, but scale=0 which should have no effect # forward pass with multi ip adapter, but scale=0 which should have no effect
inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs())
inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2
inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2
pipe.set_ip_adapter_scale([0.0, 0.0]) pipe.set_ip_adapter_scale([0.0, 0.0])
...@@ -203,7 +165,7 @@ class SDXLModularIPAdapterTests: ...@@ -203,7 +165,7 @@ class SDXLModularIPAdapterTests:
output_without_multi_adapter_scale = output_without_multi_adapter_scale[0, -3:, -3:, -1].flatten() output_without_multi_adapter_scale = output_without_multi_adapter_scale[0, -3:, -3:, -1].flatten()
# forward pass with multi ip adapter, but with scale of adapter weights # forward pass with multi ip adapter, but with scale of adapter weights
inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs())
inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2
inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2
pipe.set_ip_adapter_scale([42.0, 42.0]) pipe.set_ip_adapter_scale([42.0, 42.0])
...@@ -211,10 +173,10 @@ class SDXLModularIPAdapterTests: ...@@ -211,10 +173,10 @@ class SDXLModularIPAdapterTests:
if expected_pipe_slice is not None: if expected_pipe_slice is not None:
output_with_multi_adapter_scale = output_with_multi_adapter_scale[0, -3:, -3:, -1].flatten() output_with_multi_adapter_scale = output_with_multi_adapter_scale[0, -3:, -3:, -1].flatten()
max_diff_without_multi_adapter_scale = np.abs( max_diff_without_multi_adapter_scale = torch.abs(
output_without_multi_adapter_scale - output_without_adapter output_without_multi_adapter_scale - output_without_adapter
).max() ).max()
max_diff_with_multi_adapter_scale = np.abs(output_with_multi_adapter_scale - output_without_adapter).max() max_diff_with_multi_adapter_scale = torch.abs(output_with_multi_adapter_scale - output_without_adapter).max()
assert max_diff_without_multi_adapter_scale < expected_max_diff, ( assert max_diff_without_multi_adapter_scale < expected_max_diff, (
"Output without multi-ip-adapter must be same as normal inference" "Output without multi-ip-adapter must be same as normal inference"
) )
...@@ -223,7 +185,7 @@ class SDXLModularIPAdapterTests: ...@@ -223,7 +185,7 @@ class SDXLModularIPAdapterTests:
) )
class SDXLModularControlNetTests: class SDXLModularControlNetTesterMixin:
""" """
This mixin is designed to test ControlNet. This mixin is designed to test ControlNet.
""" """
...@@ -262,24 +224,26 @@ class SDXLModularControlNetTests: ...@@ -262,24 +224,26 @@ class SDXLModularControlNetTests:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
# forward pass without controlnet # forward pass without controlnet
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
output_without_controlnet = pipe(**inputs, output="images") output_without_controlnet = pipe(**inputs, output="images")
output_without_controlnet = output_without_controlnet[0, -3:, -3:, -1].flatten() output_without_controlnet = output_without_controlnet[0, -3:, -3:, -1].flatten()
# forward pass with single controlnet, but scale=0 which should have no effect # forward pass with single controlnet, but scale=0 which should have no effect
inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs())
inputs["controlnet_conditioning_scale"] = 0.0 inputs["controlnet_conditioning_scale"] = 0.0
output_without_controlnet_scale = pipe(**inputs, output="images") output_without_controlnet_scale = pipe(**inputs, output="images")
output_without_controlnet_scale = output_without_controlnet_scale[0, -3:, -3:, -1].flatten() output_without_controlnet_scale = output_without_controlnet_scale[0, -3:, -3:, -1].flatten()
# forward pass with single controlnet, but with scale of adapter weights # forward pass with single controlnet, but with scale of adapter weights
inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs())
inputs["controlnet_conditioning_scale"] = 42.0 inputs["controlnet_conditioning_scale"] = 42.0
output_with_controlnet_scale = pipe(**inputs, output="images") output_with_controlnet_scale = pipe(**inputs, output="images")
output_with_controlnet_scale = output_with_controlnet_scale[0, -3:, -3:, -1].flatten() output_with_controlnet_scale = output_with_controlnet_scale[0, -3:, -3:, -1].flatten()
max_diff_without_controlnet_scale = np.abs(output_without_controlnet_scale - output_without_controlnet).max() max_diff_without_controlnet_scale = torch.abs(
max_diff_with_controlnet_scale = np.abs(output_with_controlnet_scale - output_without_controlnet).max() output_without_controlnet_scale - output_without_controlnet
).max()
max_diff_with_controlnet_scale = torch.abs(output_with_controlnet_scale - output_without_controlnet).max()
assert max_diff_without_controlnet_scale < expected_max_diff, ( assert max_diff_without_controlnet_scale < expected_max_diff, (
"Output without controlnet must be same as normal inference" "Output without controlnet must be same as normal inference"
...@@ -295,21 +259,21 @@ class SDXLModularControlNetTests: ...@@ -295,21 +259,21 @@ class SDXLModularControlNetTests:
guider = ClassifierFreeGuidance(guidance_scale=1.0) guider = ClassifierFreeGuidance(guidance_scale=1.0)
pipe.update_components(guider=guider) pipe.update_components(guider=guider)
inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs())
out_no_cfg = pipe(**inputs, output="images") out_no_cfg = pipe(**inputs, output="images")
# forward pass with CFG applied # forward pass with CFG applied
guider = ClassifierFreeGuidance(guidance_scale=7.5) guider = ClassifierFreeGuidance(guidance_scale=7.5)
pipe.update_components(guider=guider) pipe.update_components(guider=guider)
inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs())
out_cfg = pipe(**inputs, output="images") out_cfg = pipe(**inputs, output="images")
assert out_cfg.shape == out_no_cfg.shape assert out_cfg.shape == out_no_cfg.shape
max_diff = np.abs(out_cfg - out_no_cfg).max() max_diff = torch.abs(out_cfg - out_no_cfg).max()
assert max_diff > 1e-2, "Output with CFG must be different from normal inference" assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
class SDXLModularGuiderTests: class SDXLModularGuiderTesterMixin:
def test_guider_cfg(self): def test_guider_cfg(self):
pipe = self.get_pipeline() pipe = self.get_pipeline()
pipe = pipe.to(torch_device) pipe = pipe.to(torch_device)
...@@ -319,13 +283,13 @@ class SDXLModularGuiderTests: ...@@ -319,13 +283,13 @@ class SDXLModularGuiderTests:
guider = ClassifierFreeGuidance(guidance_scale=1.0) guider = ClassifierFreeGuidance(guidance_scale=1.0)
pipe.update_components(guider=guider) pipe.update_components(guider=guider)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
out_no_cfg = pipe(**inputs, output="images") out_no_cfg = pipe(**inputs, output="images")
# forward pass with CFG applied # forward pass with CFG applied
guider = ClassifierFreeGuidance(guidance_scale=7.5) guider = ClassifierFreeGuidance(guidance_scale=7.5)
pipe.update_components(guider=guider) pipe.update_components(guider=guider)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
out_cfg = pipe(**inputs, output="images") out_cfg = pipe(**inputs, output="images")
assert out_cfg.shape == out_no_cfg.shape assert out_cfg.shape == out_no_cfg.shape
...@@ -333,30 +297,57 @@ class SDXLModularGuiderTests: ...@@ -333,30 +297,57 @@ class SDXLModularGuiderTests:
assert max_diff > 1e-2, "Output with CFG must be different from normal inference" assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
class SDXLModularPipelineFastTests( class TestSDXLModularPipelineFast(
SDXLModularTests, SDXLModularTesterMixin,
SDXLModularIPAdapterTests, SDXLModularIPAdapterTesterMixin,
SDXLModularControlNetTests, SDXLModularControlNetTesterMixin,
SDXLModularGuiderTests, SDXLModularGuiderTesterMixin,
ModularPipelineTesterMixin, ModularPipelineTesterMixin,
unittest.TestCase,
): ):
"""Test cases for Stable Diffusion XL modular pipeline fast tests.""" """Test cases for Stable Diffusion XL modular pipeline fast tests."""
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
repo = "hf-internal-testing/tiny-sdxl-modular"
params = frozenset(
[
"prompt",
"height",
"width",
"negative_prompt",
"cross_attention_kwargs",
]
)
batch_params = frozenset(["prompt", "negative_prompt"])
expected_image_output_shape = (1, 3, 64, 64)
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"generator": generator,
"num_inference_steps": 2,
"output_type": "pt",
}
return inputs
def test_stable_diffusion_xl_euler(self): def test_stable_diffusion_xl_euler(self):
self._test_stable_diffusion_xl_euler( self._test_stable_diffusion_xl_euler(
expected_image_shape=(1, 64, 64, 3), expected_image_shape=self.expected_image_output_shape,
expected_slice=[ expected_slice=torch.tensor(
0.5966781, [
0.62939394, 0.5966781,
0.48465094, 0.62939394,
0.51573336, 0.48465094,
0.57593524, 0.51573336,
0.47035995, 0.57593524,
0.53410417, 0.47035995,
0.51436996, 0.53410417,
0.47313565, 0.51436996,
], 0.47313565,
],
device=torch_device,
),
expected_max_diff=1e-2, expected_max_diff=1e-2,
) )
...@@ -364,39 +355,65 @@ class SDXLModularPipelineFastTests( ...@@ -364,39 +355,65 @@ class SDXLModularPipelineFastTests(
super().test_inference_batch_single_identical(expected_max_diff=3e-3) super().test_inference_batch_single_identical(expected_max_diff=3e-3)
class SDXLImg2ImgModularPipelineFastTests( class TestSDXLImg2ImgModularPipelineFast(
SDXLModularTests, SDXLModularTesterMixin,
SDXLModularIPAdapterTests, SDXLModularIPAdapterTesterMixin,
SDXLModularControlNetTests, SDXLModularControlNetTesterMixin,
SDXLModularGuiderTests, SDXLModularGuiderTesterMixin,
ModularPipelineTesterMixin, ModularPipelineTesterMixin,
unittest.TestCase,
): ):
"""Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests.""" """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests."""
def get_dummy_inputs(self, device, seed=0): pipeline_class = StableDiffusionXLModularPipeline
inputs = super().get_dummy_inputs(device, seed) pipeline_blocks_class = StableDiffusionXLAutoBlocks
image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) repo = "hf-internal-testing/tiny-sdxl-modular"
image = image / 2 + 0.5 params = frozenset(
inputs["image"] = image [
inputs["strength"] = 0.8 "prompt",
"height",
"width",
"negative_prompt",
"cross_attention_kwargs",
"image",
]
)
batch_params = frozenset(["prompt", "negative_prompt", "image"])
expected_image_output_shape = (1, 3, 64, 64)
def get_dummy_inputs(self, seed=0):
generator = self.get_generator(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"generator": generator,
"num_inference_steps": 4,
"output_type": "pt",
}
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device)
image = image.cpu().permute(0, 2, 3, 1)[0]
init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64))
inputs["image"] = init_image
inputs["strength"] = 0.5
return inputs return inputs
def test_stable_diffusion_xl_euler(self): def test_stable_diffusion_xl_euler(self):
self._test_stable_diffusion_xl_euler( self._test_stable_diffusion_xl_euler(
expected_image_shape=(1, 64, 64, 3), expected_image_shape=self.expected_image_output_shape,
expected_slice=[ expected_slice=torch.tensor(
0.56943184, [
0.4702148, 0.56943184,
0.48048905, 0.4702148,
0.6235963, 0.48048905,
0.551138, 0.6235963,
0.49629188, 0.551138,
0.60031277, 0.49629188,
0.5688907, 0.60031277,
0.43996853, 0.5688907,
], 0.43996853,
],
device=torch_device,
),
expected_max_diff=1e-2, expected_max_diff=1e-2,
) )
...@@ -405,20 +422,43 @@ class SDXLImg2ImgModularPipelineFastTests( ...@@ -405,20 +422,43 @@ class SDXLImg2ImgModularPipelineFastTests(
class SDXLInpaintingModularPipelineFastTests( class SDXLInpaintingModularPipelineFastTests(
SDXLModularTests, SDXLModularTesterMixin,
SDXLModularIPAdapterTests, SDXLModularIPAdapterTesterMixin,
SDXLModularControlNetTests, SDXLModularControlNetTesterMixin,
SDXLModularGuiderTests, SDXLModularGuiderTesterMixin,
ModularPipelineTesterMixin, ModularPipelineTesterMixin,
unittest.TestCase,
): ):
"""Test cases for Stable Diffusion XL inpainting modular pipeline fast tests.""" """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests."""
pipeline_class = StableDiffusionXLModularPipeline
pipeline_blocks_class = StableDiffusionXLAutoBlocks
repo = "hf-internal-testing/tiny-sdxl-modular"
params = frozenset(
[
"prompt",
"height",
"width",
"negative_prompt",
"cross_attention_kwargs",
"image",
"mask_image",
]
)
batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"])
expected_image_output_shape = (1, 3, 64, 64)
def get_dummy_inputs(self, device, seed=0): def get_dummy_inputs(self, device, seed=0):
inputs = super().get_dummy_inputs(device, seed) generator = self.get_generator(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"generator": generator,
"num_inference_steps": 4,
"output_type": "pt",
}
image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
image = image.cpu().permute(0, 2, 3, 1)[0] image = image.cpu().permute(0, 2, 3, 1)[0]
init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64))
# create mask # create mask
image[8:, 8:, :] = 255 image[8:, 8:, :] = 255
mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64))
...@@ -431,18 +471,21 @@ class SDXLInpaintingModularPipelineFastTests( ...@@ -431,18 +471,21 @@ class SDXLInpaintingModularPipelineFastTests(
def test_stable_diffusion_xl_euler(self): def test_stable_diffusion_xl_euler(self):
self._test_stable_diffusion_xl_euler( self._test_stable_diffusion_xl_euler(
expected_image_shape=(1, 64, 64, 3), expected_image_shape=self.expected_image_output_shape,
expected_slice=[ expected_slice=torch.tensor(
0.40872607, [
0.38842705, 0.40872607,
0.34893104, 0.38842705,
0.47837183, 0.34893104,
0.43792963, 0.47837183,
0.5332134, 0.43792963,
0.3716843, 0.5332134,
0.47274873, 0.3716843,
0.45000193, 0.47274873,
], 0.45000193,
],
device=torch_device,
),
expected_max_diff=1e-2, expected_max_diff=1e-2,
) )
......
import gc import gc
import tempfile import tempfile
import unittest
from typing import Callable, Union from typing import Callable, Union
import numpy as np
import torch import torch
import diffusers import diffusers
...@@ -19,17 +17,9 @@ from ..testing_utils import ( ...@@ -19,17 +17,9 @@ from ..testing_utils import (
) )
def to_np(tensor):
if isinstance(tensor, torch.Tensor):
tensor = tensor.detach().cpu().numpy()
return tensor
@require_torch @require_torch
class ModularPipelineTesterMixin: class ModularPipelineTesterMixin:
""" """
This mixin is designed to be used with unittest.TestCase classes.
It provides a set of common tests for each modular pipeline, It provides a set of common tests for each modular pipeline,
including: including:
- test_pipeline_call_signature: check if the pipeline's __call__ method has all required parameters - test_pipeline_call_signature: check if the pipeline's __call__ method has all required parameters
...@@ -57,9 +47,8 @@ class ModularPipelineTesterMixin: ...@@ -57,9 +47,8 @@ class ModularPipelineTesterMixin:
] ]
) )
def get_generator(self, seed): def get_generator(self, seed=0):
device = torch_device if torch_device != "mps" else "cpu" generator = torch.Generator("cpu").manual_seed(seed)
generator = torch.Generator(device).manual_seed(seed)
return generator return generator
@property @property
...@@ -82,13 +71,7 @@ class ModularPipelineTesterMixin: ...@@ -82,13 +71,7 @@ class ModularPipelineTesterMixin:
"See existing pipeline tests for reference." "See existing pipeline tests for reference."
) )
def get_pipeline(self): def get_dummy_inputs(self, seed=0):
raise NotImplementedError(
"You need to implement `get_pipeline(self)` in the child test class. "
"See existing pipeline tests for reference."
)
def get_dummy_inputs(self, device, seed=0):
raise NotImplementedError( raise NotImplementedError(
"You need to implement `get_dummy_inputs(self, device, seed)` in the child test class. " "You need to implement `get_dummy_inputs(self, device, seed)` in the child test class. "
"See existing pipeline tests for reference." "See existing pipeline tests for reference."
...@@ -123,20 +106,23 @@ class ModularPipelineTesterMixin: ...@@ -123,20 +106,23 @@ class ModularPipelineTesterMixin:
"See existing pipeline tests for reference." "See existing pipeline tests for reference."
) )
def setUp(self): def setup_method(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp()
torch.compiler.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
def tearDown(self): def teardown_method(self):
# clean up the VRAM after each test in case of CUDA runtime errors # clean up the VRAM after each test in case of CUDA runtime errors
super().tearDown()
torch.compiler.reset() torch.compiler.reset()
gc.collect() gc.collect()
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
pipeline.load_components(torch_dtype=torch_dtype)
return pipeline
def test_pipeline_call_signature(self): def test_pipeline_call_signature(self):
pipe = self.get_pipeline() pipe = self.get_pipeline()
input_parameters = pipe.blocks.input_names input_parameters = pipe.blocks.input_names
...@@ -156,7 +142,7 @@ class ModularPipelineTesterMixin: ...@@ -156,7 +142,7 @@ class ModularPipelineTesterMixin:
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
inputs["generator"] = self.get_generator(0) inputs["generator"] = self.get_generator(0)
logger = logging.get_logger(pipe.__module__) logger = logging.get_logger(pipe.__module__)
...@@ -196,7 +182,7 @@ class ModularPipelineTesterMixin: ...@@ -196,7 +182,7 @@ class ModularPipelineTesterMixin:
pipe = self.get_pipeline() pipe = self.get_pipeline()
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
# Reset generator in case it is has been used in self.get_dummy_inputs # Reset generator in case it is has been used in self.get_dummy_inputs
inputs["generator"] = self.get_generator(0) inputs["generator"] = self.get_generator(0)
...@@ -226,10 +212,9 @@ class ModularPipelineTesterMixin: ...@@ -226,10 +212,9 @@ class ModularPipelineTesterMixin:
assert output_batch.shape[0] == batch_size assert output_batch.shape[0] == batch_size
max_diff = np.abs(to_np(output_batch[0]) - to_np(output[0])).max() max_diff = torch.abs(output_batch[0] - output[0]).max()
assert max_diff < expected_max_diff, "Batch inference results different from single inference results" assert max_diff < expected_max_diff, "Batch inference results different from single inference results"
@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator @require_accelerator
def test_float16_inference(self, expected_max_diff=5e-2): def test_float16_inference(self, expected_max_diff=5e-2):
pipe = self.get_pipeline() pipe = self.get_pipeline()
...@@ -240,13 +225,13 @@ class ModularPipelineTesterMixin: ...@@ -240,13 +225,13 @@ class ModularPipelineTesterMixin:
pipe_fp16.to(torch_device, torch.float16) pipe_fp16.to(torch_device, torch.float16)
pipe_fp16.set_progress_bar_config(disable=None) pipe_fp16.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
# Reset generator in case it is used inside dummy inputs # Reset generator in case it is used inside dummy inputs
if "generator" in inputs: if "generator" in inputs:
inputs["generator"] = self.get_generator(0) inputs["generator"] = self.get_generator(0)
output = pipe(**inputs, output="images") output = pipe(**inputs, output="images")
fp16_inputs = self.get_dummy_inputs(torch_device) fp16_inputs = self.get_dummy_inputs()
# Reset generator in case it is used inside dummy inputs # Reset generator in case it is used inside dummy inputs
if "generator" in fp16_inputs: if "generator" in fp16_inputs:
fp16_inputs["generator"] = self.get_generator(0) fp16_inputs["generator"] = self.get_generator(0)
...@@ -283,8 +268,8 @@ class ModularPipelineTesterMixin: ...@@ -283,8 +268,8 @@ class ModularPipelineTesterMixin:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.to("cpu") pipe.to("cpu")
output = pipe(**self.get_dummy_inputs("cpu"), output="images") output = pipe(**self.get_dummy_inputs(), output="images")
assert np.isnan(to_np(output)).sum() == 0, "CPU Inference returns NaN" assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
@require_accelerator @require_accelerator
def test_inference_is_not_nan(self): def test_inference_is_not_nan(self):
...@@ -292,8 +277,8 @@ class ModularPipelineTesterMixin: ...@@ -292,8 +277,8 @@ class ModularPipelineTesterMixin:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.to(torch_device) pipe.to(torch_device)
output = pipe(**self.get_dummy_inputs(torch_device), output="images") output = pipe(**self.get_dummy_inputs(), output="images")
assert np.isnan(to_np(output)).sum() == 0, "Accelerator Inference returns NaN" assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN"
def test_num_images_per_prompt(self): def test_num_images_per_prompt(self):
pipe = self.get_pipeline() pipe = self.get_pipeline()
...@@ -309,7 +294,7 @@ class ModularPipelineTesterMixin: ...@@ -309,7 +294,7 @@ class ModularPipelineTesterMixin:
for batch_size in batch_sizes: for batch_size in batch_sizes:
for num_images_per_prompt in num_images_per_prompts: for num_images_per_prompt in num_images_per_prompts:
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
for key in inputs.keys(): for key in inputs.keys():
if key in self.batch_params: if key in self.batch_params:
...@@ -329,12 +314,12 @@ class ModularPipelineTesterMixin: ...@@ -329,12 +314,12 @@ class ModularPipelineTesterMixin:
image_slices = [] image_slices = []
for pipe in [base_pipe, offload_pipe]: for pipe in [base_pipe, offload_pipe]:
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
image = pipe(**inputs, output="images") image = pipe(**inputs, output="images")
image_slices.append(image[0, -3:, -3:, -1].flatten()) image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
def test_save_from_pretrained(self): def test_save_from_pretrained(self):
pipes = [] pipes = []
...@@ -351,9 +336,9 @@ class ModularPipelineTesterMixin: ...@@ -351,9 +336,9 @@ class ModularPipelineTesterMixin:
image_slices = [] image_slices = []
for pipe in pipes: for pipe in pipes:
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs()
image = pipe(**inputs, output="images") image = pipe(**inputs, output="images")
image_slices.append(image[0, -3:, -3:, -1].flatten()) image_slices.append(image[0, -3:, -3:, -1].flatten())
assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment