[SAG] Support more schedulers, add better error message and make tests faster (#6465)

* finish * finish --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>

[SAG] Support more schedulers, add better error message and make tests faster (#6465)
* finish * finish --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
5bacc2f5 · Patrick von Platen · GitHub · 6ae7e811 · 5bacc2f5 · 5bacc2f5
Unverified Commit 5bacc2f5 authored Jan 09, 2024 by Patrick von Platen Committed by GitHub Jan 09, 2024
2 changed files
--- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
+++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py
@@ -681,6 +681,11 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin,
        self.scheduler.set_timesteps(num_inference_steps, device=device)
        timesteps = self.scheduler.timesteps

+        if timesteps.dtype not in [torch.int16, torch.int32, torch.int64]:
+            raise ValueError(
+                f"{self.__class__.__name__} does not support using a scheduler of type {self.scheduler.__class__.__name__}. Please make sure to use one of 'DDIMScheduler, PNDMScheduler, DDPMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, DPMSolverMultistepScheduler, DPMSolverSinlgestepScheduler'."
+            )
+
        # 5. Prepare latent variables
        num_channels_latents = self.unet.config.in_channels
        latents = self.prepare_latents(
@@ -830,14 +835,14 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin,
        degraded_latents = degraded_latents * attn_mask + original_latents * (1 - attn_mask)

        # Noise it again to match the noise level
-        degraded_latents = self.scheduler.add_noise(degraded_latents, noise=eps, timesteps=t)
+        degraded_latents = self.scheduler.add_noise(degraded_latents, noise=eps, timesteps=t[None])

        return degraded_latents

    # Modified from diffusers.schedulers.scheduling_ddim.DDIMScheduler.step
    # Note: there are some schedulers that clip or do not return x_0 (PNDMScheduler, DDIMScheduler, etc.)
    def pred_x0(self, sample, model_output, timestep):
-        alpha_prod_t = self.scheduler.alphas_cumprod[timestep]
+        alpha_prod_t = self.scheduler.alphas_cumprod[timestep].to(sample.device)

        beta_prod_t = 1 - alpha_prod_t
        if self.scheduler.config.prediction_type == "epsilon":

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_sag.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_sag.py
@@ -23,6 +23,9 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
 from diffusers import (
    AutoencoderKL,
    DDIMScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    EulerDiscreteScheduler,
    StableDiffusionSAGPipeline,
    UNet2DConditionModel,
 )
@@ -45,14 +48,15 @@ class StableDiffusionSAGPipelineFastTests(PipelineLatentTesterMixin, PipelineTes
    def get_dummy_components(self):
        torch.manual_seed(0)
        unet = UNet2DConditionModel(
-            block_out_channels=(32, 64),
+            block_out_channels=(4, 8),
            layers_per_block=2,
-            sample_size=32,
+            sample_size=8,
+            norm_num_groups=1,
            in_channels=4,
            out_channels=4,
            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
-            cross_attention_dim=32,
+            cross_attention_dim=8,
        )
        scheduler = DDIMScheduler(
            beta_start=0.00085,
@@ -63,7 +67,8 @@ class StableDiffusionSAGPipelineFastTests(PipelineLatentTesterMixin, PipelineTes
        )
        torch.manual_seed(0)
        vae = AutoencoderKL(
-            block_out_channels=[32, 64],
+            block_out_channels=[4, 8],
+            norm_num_groups=1,
            in_channels=3,
            out_channels=3,
            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
@@ -74,11 +79,11 @@ class StableDiffusionSAGPipelineFastTests(PipelineLatentTesterMixin, PipelineTes
        text_encoder_config = CLIPTextConfig(
            bos_token_id=0,
            eos_token_id=2,
-            hidden_size=32,
+            hidden_size=8,
+            num_hidden_layers=2,
            intermediate_size=37,
            layer_norm_eps=1e-05,
            num_attention_heads=4,
-            num_hidden_layers=5,
            pad_token_id=1,
            vocab_size=1000,
        )
@@ -108,13 +113,35 @@ class StableDiffusionSAGPipelineFastTests(PipelineLatentTesterMixin, PipelineTes
            "num_inference_steps": 2,
            "guidance_scale": 1.0,
            "sag_scale": 1.0,
-            "output_type": "numpy",
+            "output_type": "np",
        }
        return inputs

    def test_inference_batch_single_identical(self):
        super().test_inference_batch_single_identical(expected_max_diff=3e-3)

+    @unittest.skip("Not necessary to test here.")
+    def test_xformers_attention_forwardGenerator_pass(self):
+        pass
+
+    def test_pipeline_different_schedulers(self):
+        pipeline = self.pipeline_class(**self.get_dummy_components())
+        inputs = self.get_dummy_inputs("cpu")
+
+        expected_image_size = (16, 16, 3)
+        for scheduler_cls in [DDIMScheduler, DEISMultistepScheduler, DPMSolverMultistepScheduler]:
+            pipeline.scheduler = scheduler_cls.from_config(pipeline.scheduler.config)
+            image = pipeline(**inputs).images[0]
+
+            shape = image.shape
+            assert shape == expected_image_size
+
+        pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
+
+        with self.assertRaises(ValueError):
+            # Karras schedulers are not supported
+            image = pipeline(**inputs).images[0]
+

 @nightly
 @require_torch_gpu