Unverified Commit 6313645b authored by YiYi Xu's avatar YiYi Xu Committed by GitHub
Browse files

add `StableDiffusionXLKDiffusionPipeline` (#6447)




---------
Co-authored-by: default avataryiyixuxu <yixu310@gmail,com>
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
parent 2d1f2182
...@@ -333,6 +333,8 @@ ...@@ -333,6 +333,8 @@
title: Latent upscaler title: Latent upscaler
- local: api/pipelines/stable_diffusion/upscale - local: api/pipelines/stable_diffusion/upscale
title: Super-resolution title: Super-resolution
- local: api/pipelines/stable_diffusion/k_diffusion
title: K-Diffusion
- local: api/pipelines/stable_diffusion/ldm3d_diffusion - local: api/pipelines/stable_diffusion/ldm3d_diffusion
title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D Upscaler title: LDM3D Text-to-(RGB, Depth), Text-to-(RGB-pano, Depth-pano), LDM3D Upscaler
- local: api/pipelines/stable_diffusion/adapter - local: api/pipelines/stable_diffusion/adapter
......
<!--Copyright 2023 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
-->
# K-Diffusion
[k-diffusion](https://github.com/crowsonkb/k-diffusion) is a popular library created by [Katherine Crowson](https://github.com/crowsonkb/). We provide `StableDiffusionKDiffusionPipeline` and `StableDiffusionXLKDiffusionPipeline` that allow you to run Stable DIffusion with samplers from k-diffusion.
Note that most the samplers from k-diffusion are implemented in Diffusers and we recommend using existing schedulers. You can find a mapping between k-diffusion samplers and schedulers in Diffusers [here](https://huggingface.co/docs/diffusers/api/schedulers/overview)
## StableDiffusionKDiffusionPipeline
[[autodoc]] StableDiffusionKDiffusionPipeline
## StableDiffusionXLKDiffusionPipeline
[[autodoc]] StableDiffusionXLKDiffusionPipeline
\ No newline at end of file
...@@ -316,7 +316,7 @@ except OptionalDependencyNotAvailable: ...@@ -316,7 +316,7 @@ except OptionalDependencyNotAvailable:
] ]
else: else:
_import_structure["pipelines"].extend(["StableDiffusionKDiffusionPipeline"]) _import_structure["pipelines"].extend(["StableDiffusionKDiffusionPipeline", "StableDiffusionXLKDiffusionPipeline"])
try: try:
if not (is_torch_available() and is_transformers_available() and is_onnx_available()): if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
...@@ -668,7 +668,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -668,7 +668,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
except OptionalDependencyNotAvailable: except OptionalDependencyNotAvailable:
from .utils.dummy_torch_and_transformers_and_k_diffusion_objects import * # noqa F403 from .utils.dummy_torch_and_transformers_and_k_diffusion_objects import * # noqa F403
else: else:
from .pipelines import StableDiffusionKDiffusionPipeline from .pipelines import StableDiffusionKDiffusionPipeline, StableDiffusionXLKDiffusionPipeline
try: try:
if not (is_torch_available() and is_transformers_available() and is_onnx_available()): if not (is_torch_available() and is_transformers_available() and is_onnx_available()):
......
...@@ -265,7 +265,10 @@ except OptionalDependencyNotAvailable: ...@@ -265,7 +265,10 @@ except OptionalDependencyNotAvailable:
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects)) _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
else: else:
_import_structure["stable_diffusion_k_diffusion"] = ["StableDiffusionKDiffusionPipeline"] _import_structure["stable_diffusion_k_diffusion"] = [
"StableDiffusionKDiffusionPipeline",
"StableDiffusionXLKDiffusionPipeline",
]
try: try:
if not is_flax_available(): if not is_flax_available():
raise OptionalDependencyNotAvailable() raise OptionalDependencyNotAvailable()
...@@ -491,7 +494,10 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -491,7 +494,10 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
except OptionalDependencyNotAvailable: except OptionalDependencyNotAvailable:
from ..utils.dummy_torch_and_transformers_and_k_diffusion_objects import * from ..utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
else: else:
from .stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline from .stable_diffusion_k_diffusion import (
StableDiffusionKDiffusionPipeline,
StableDiffusionXLKDiffusionPipeline,
)
try: try:
if not is_flax_available(): if not is_flax_available():
......
...@@ -30,6 +30,7 @@ except OptionalDependencyNotAvailable: ...@@ -30,6 +30,7 @@ except OptionalDependencyNotAvailable:
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects)) _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_k_diffusion_objects))
else: else:
_import_structure["pipeline_stable_diffusion_k_diffusion"] = ["StableDiffusionKDiffusionPipeline"] _import_structure["pipeline_stable_diffusion_k_diffusion"] = ["StableDiffusionKDiffusionPipeline"]
_import_structure["pipeline_stable_diffusion_xl_k_diffusion"] = ["StableDiffusionXLKDiffusionPipeline"]
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try: try:
...@@ -45,6 +46,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: ...@@ -45,6 +46,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
from ...utils.dummy_torch_and_transformers_and_k_diffusion_objects import * from ...utils.dummy_torch_and_transformers_and_k_diffusion_objects import *
else: else:
from .pipeline_stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline from .pipeline_stable_diffusion_k_diffusion import StableDiffusionKDiffusionPipeline
from .pipeline_stable_diffusion_xl_k_diffusion import StableDiffusionXLKDiffusionPipeline
else: else:
import sys import sys
......
...@@ -134,7 +134,15 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade ...@@ -134,7 +134,15 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
def set_scheduler(self, scheduler_type: str): def set_scheduler(self, scheduler_type: str):
library = importlib.import_module("k_diffusion") library = importlib.import_module("k_diffusion")
sampling = getattr(library, "sampling") sampling = getattr(library, "sampling")
self.sampler = getattr(sampling, scheduler_type) try:
self.sampler = getattr(sampling, scheduler_type)
except Exception:
valid_samplers = []
for s in dir(sampling):
if "sample_" in s:
valid_samplers.append(s)
raise ValueError(f"Invalid scheduler type {scheduler_type}. Please choose one of {valid_samplers}.")
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
......
...@@ -15,3 +15,18 @@ class StableDiffusionKDiffusionPipeline(metaclass=DummyObject): ...@@ -15,3 +15,18 @@ class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
@classmethod @classmethod
def from_pretrained(cls, *args, **kwargs): def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "k_diffusion"]) requires_backends(cls, ["torch", "transformers", "k_diffusion"])
class StableDiffusionXLKDiffusionPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "k_diffusion"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers", "k_diffusion"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "k_diffusion"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "k_diffusion"])
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
import numpy as np
import torch
from diffusers import StableDiffusionXLKDiffusionPipeline
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
enable_full_determinism()
@slow
@require_torch_gpu
class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
dtype = torch.float16
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_stable_diffusion_xl(self):
sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
sd_pipe.set_scheduler("sample_euler")
prompt = "A painting of a squirrel eating a burger"
generator = torch.manual_seed(0)
output = sd_pipe(
[prompt],
generator=generator,
guidance_scale=9.0,
num_inference_steps=20,
height=512,
width=512,
output_type="np",
)
image = output.images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.79804534, 0.7981539, 0.8019961, 0.7936565, 0.7892033, 0.7914713, 0.7792827, 0.77754563, 0.7836789]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_stable_diffusion_karras_sigmas(self):
sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
sd_pipe.set_scheduler("sample_dpmpp_2m")
prompt = "A painting of a squirrel eating a burger"
generator = torch.manual_seed(0)
output = sd_pipe(
[prompt],
generator=generator,
guidance_scale=7.5,
num_inference_steps=15,
output_type="np",
use_karras_sigmas=True,
height=512,
width=512,
)
image = output.images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.9704869, 0.9714559, 0.9693254, 0.96892524, 0.9685236, 0.9659081, 0.9666761, 0.9619067, 0.961759]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
def test_stable_diffusion_noise_sampler_seed(self):
sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=self.dtype
)
sd_pipe = sd_pipe.to(torch_device)
sd_pipe.set_progress_bar_config(disable=None)
sd_pipe.set_scheduler("sample_dpmpp_sde")
prompt = "A painting of a squirrel eating a burger"
seed = 0
images1 = sd_pipe(
[prompt],
generator=torch.manual_seed(seed),
noise_sampler_seed=seed,
guidance_scale=9.0,
num_inference_steps=20,
output_type="np",
height=512,
width=512,
).images
images2 = sd_pipe(
[prompt],
generator=torch.manual_seed(seed),
noise_sampler_seed=seed,
guidance_scale=9.0,
num_inference_steps=20,
output_type="np",
height=512,
width=512,
).images
assert images1.shape == (1, 512, 512, 3)
assert images2.shape == (1, 512, 512, 3)
assert np.abs(images1.flatten() - images2.flatten()).max() < 1e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment