add `StableDiffusionXLKDiffusionPipeline` (#6447)

--------- Co-authored-by: yiyixuxu <yixu310@gmail,com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

add `StableDiffusionXLKDiffusionPipeline` (#6447)
--------- Co-authored-by: yiyixuxu <yixu310@gmail,com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
6313645b · YiYi Xu · GitHub · 2d1f2182 · 6313645b · 6313645b
Unverified Commit 6313645b authored Jan 09, 2024 by YiYi Xu Committed by GitHub Jan 09, 2024
9 changed files
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -333,6 +333,8 @@
        title: Latent upscaler
      - local: api/pipelines/stable_diffusion/upscale
        title: Super-resolution
+      - local: api/pipelines/stable_diffusion/k_diffusion
+        title: K-Diffusion
      - local: api/pipelines/stable_diffusion/ldm3d_diffusion
        title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D Upscaler
      - local: api/pipelines/stable_diffusion/adapter

--- a/docs/source/en/api/pipelines/stable_diffusion/k_diffusion.md
+++ b/docs/source/en/api/pipelines/stable_diffusion/k_diffusion.md
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+# K-Diffusion
+[k-diffusion](https://github.com/crowsonkb/k-diffusion) is a popular library created by [Katherine Crowson](https://github.com/crowsonkb/). We provide `StableDiffusionKDiffusionPipeline` and `StableDiffusionXLKDiffusionPipeline` that allow you to run Stable DIffusion with samplers from k-diffusion. 
+Note that most the samplers from k-diffusion are implemented in Diffusers and we recommend using existing schedulers. You can find a mapping between k-diffusion samplers and schedulers in Diffusers [here](https://huggingface.co/docs/diffusers/api/schedulers/overview)
+## StableDiffusionKDiffusionPipeline
+[[autodoc]] StableDiffusionKDiffusionPipeline
+## StableDiffusionXLKDiffusionPipeline
+[[autodoc]] StableDiffusionXLKDiffusionPipeline
\ No newline at end of file
--- a/src/diffusers/__init__.py
+++ b/src/diffusers/__init__.py
@@ -316,7 +316,7 @@ except OptionalDependencyNotAvailable:
    ]
 else:
-    _import_structure["pipelines"].extend(["StableDiffusionKDiffusionPipeline"])
+    _import_structure["pipelines"].extend(["StableDiffusionKDiffusionPipeline", "StableDiffusionXLKDiffusionPipeline"])
 try:
    if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
@@ -668,7 +668,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
    except OptionalDependencyNotAvailable:
        from .utils.dummy_torch_and_transformers_and_k_diffusion_objects import *  # noqa F403
    else:
-        from .pipelines import StableDiffusionKDiffusionPipeline
+        from .pipelines import StableDiffusionKDiffusionPipeline, StableDiffusionXLKDiffusionPipeline
    try:
        if not (is_torch_available() and is_transformers_available() and is_onnx_available()):

--- a/src/diffusers/pipelines/__init__.py
+++ b/src/diffusers/pipelines/__init__.py
@@ -265,7 +265,10 @@ except OptionalDependencyNotAvailable:
    _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
 else:
-    _import_structure["stable_diffusion_k_diffusion"] = ["StableDiffusionKDiffusionPipeline"]
+    _import_structure["stable_diffusion_k_diffusion"] = [
+        "StableDiffusionKDiffusionPipeline",
+        "StableDiffusionXLKDiffusionPipeline",
+    ]
 try:
    if not is_flax_available():
        raise OptionalDependencyNotAvailable()
@@ -491,7 +494,10 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
        except OptionalDependencyNotAvailable:
            from ..utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
        else:
-            from .stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline
+            from .stable_diffusion_k_diffusion import (
+                StableDiffusionKDiffusionPipeline,
+                StableDiffusionXLKDiffusionPipeline,
+            )
        try:
            if not is_flax_available():

--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py
@@ -30,6 +30,7 @@ except OptionalDependencyNotAvailable:
    _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
 else:
    _import_structure["pipeline_stable_diffusion_k_diffusion"] = ["StableDiffusionKDiffusionPipeline"]
+    _import_structure["pipeline_stable_diffusion_xl_k_diffusion"] = ["StableDiffusionXLKDiffusionPipeline"]
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
    try:
@@ -45,6 +46,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
        from ...utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
    else:
        from .pipeline_stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline
+        from .pipeline_stable_diffusion_xl_k_diffusion import StableDiffusionXLKDiffusionPipeline
 else:
    import sys

--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py
@@ -134,7 +134,15 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
    def set_scheduler(self, scheduler_type: str):
        library = importlib.import_module("k_diffusion")
        sampling = getattr(library, "sampling")
-        self.sampler = getattr(sampling, scheduler_type)
+        try:
+            self.sampler = getattr(sampling, scheduler_type)
+        except Exception:
+            valid_samplers = []
+            for s in dir(sampling):
+                if "sample_" in s:
+                    valid_samplers.append(s)
+            raise ValueError(f"Invalid scheduler type {scheduler_type}. Please choose one of {valid_samplers}.")
    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
    def _encode_prompt(

--- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
--- a/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py
+++ b/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py
@@ -15,3 +15,18 @@ class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
    @classmethod
    def from_pretrained(cls, *args, **kwargs):
        requires_backends(cls, ["torch", "transformers", "k_diffusion"])
+class StableDiffusionXLKDiffusionPipeline(metaclass=DummyObject):
+    _backends = ["torch", "transformers", "k_diffusion"]
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch", "transformers", "k_diffusion"])
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers", "k_diffusion"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers", "k_diffusion"])
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_k_diffusion.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_k_diffusion.py
+# coding=utf-8
+# Copyright 2023 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import gc
+import unittest
+import numpy as np
+import torch
+from diffusers import StableDiffusionXLKDiffusionPipeline
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
+enable_full_determinism()
+@slow
+@require_torch_gpu
+class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
+    dtype = torch.float16
+    def tearDown(self):
+        # clean up the VRAM after each test
+        super().tearDown()
+        gc.collect()
+        torch.cuda.empty_cache()
+    def test_stable_diffusion_xl(self):
+        sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
+        )
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.set_scheduler("sample_euler")
+        prompt = "A painting of a squirrel eating a burger"
+        generator = torch.manual_seed(0)
+        output = sd_pipe(
+            [prompt],
+            generator=generator,
+            guidance_scale=9.0,
+            num_inference_steps=20,
+            height=512,
+            width=512,
+            output_type="np",
+        )
+        image = output.images
+        image_slice = image[0, -3:, -3:, -1]
+        assert image.shape == (1, 512, 512, 3)
+        expected_slice = np.array(
+            [0.79804534, 0.7981539, 0.8019961, 0.7936565, 0.7892033, 0.7914713, 0.7792827, 0.77754563, 0.7836789]
+        )
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+    def test_stable_diffusion_karras_sigmas(self):
+        sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
+        )
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.set_scheduler("sample_dpmpp_2m")
+        prompt = "A painting of a squirrel eating a burger"
+        generator = torch.manual_seed(0)
+        output = sd_pipe(
+            [prompt],
+            generator=generator,
+            guidance_scale=7.5,
+            num_inference_steps=15,
+            output_type="np",
+            use_karras_sigmas=True,
+            height=512,
+            width=512,
+        )
+        image = output.images
+        image_slice = image[0, -3:, -3:, -1]
+        assert image.shape == (1, 512, 512, 3)
+        expected_slice = np.array(
+            [0.9704869, 0.9714559, 0.9693254, 0.96892524, 0.9685236, 0.9659081, 0.9666761, 0.9619067, 0.961759]
+        )
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+    def test_stable_diffusion_noise_sampler_seed(self):
+        sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
+        )
+        sd_pipe = sd_pipe.to(torch_device)
+        sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe.set_scheduler("sample_dpmpp_sde")
+        prompt = "A painting of a squirrel eating a burger"
+        seed = 0
+        images1 = sd_pipe(
+            [prompt],
+            generator=torch.manual_seed(seed),
+            noise_sampler_seed=seed,
+            guidance_scale=9.0,
+            num_inference_steps=20,
+            output_type="np",
+            height=512,
+            width=512,
+        ).images
+        images2 = sd_pipe(
+            [prompt],
+            generator=torch.manual_seed(seed),
+            noise_sampler_seed=seed,
+            guidance_scale=9.0,
+            num_inference_steps=20,
+            output_type="np",
+            height=512,
+            width=512,
+        ).images
+        assert images1.shape == (1, 512, 512, 3)
+        assert images2.shape == (1, 512, 512, 3)
+        assert np.abs(images1.flatten() - images2.flatten()).max() < 1e-2