Unverified Commit 5383188c authored by Suraj Patil's avatar Suraj Patil Committed by GitHub
Browse files

StableDiffusionDepth2ImgPipeline (#1531)



* begin depth pipeline

* add depth estimation model

* fix prepare_depth_mask

* add a comment about autocast

* copied from, quality, cleanup

* begin tests

* handle tensors

* norm image tensor

* fix batch size

* fix tests

* fix enable_sequential_cpu_offload

* fix save load

* fix test_save_load_float16

* fix test_save_load_optional_components

* fix test_float16_inference

* fix test_cpu_offload_forward_pass

* fix test_dict_tuple_outputs_equivalent

* up

* fix fast tests

* fix test_stable_diffusion_img2img_multiple_init_images

* fix few more fast tests

* don't use device map for DPT

* fix test_stable_diffusion_pipeline_with_sequential_cpu_offloading

* accept external depth maps

* prepare_depth_mask -> prepare_depth_map

* fix file name

* fix file name

* quality

* check transformers version

* fix test names

* use skipif

* fix import

* add docs

* skip tests on mps

* correct version

* uP

* Update docs/source/api/pipelines/stable_diffusion_2.mdx

* fix fix-copies

* fix fix-copies
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: default avataranton- <anton@huggingface.co>
parent dbe07192
......@@ -97,6 +97,14 @@ If you want to use all possible use cases in a single `DiffusionPipeline` you ca
- enable_xformers_memory_efficient_attention
- disable_xformers_memory_efficient_attention
## StableDiffusionDepth2ImgPipeline
[[autodoc]] StableDiffusionDepth2ImgPipeline
- __call__
- enable_attention_slicing
- disable_attention_slicing
- enable_xformers_memory_efficient_attention
- disable_xformers_memory_efficient_attention
## StableDiffusionImageVariationPipeline
[[autodoc]] StableDiffusionImageVariationPipeline
- __call__
......
......@@ -30,6 +30,7 @@ Note that the architecture is more or less identical to [Stable Diffusion 1](./a
- *Text-to-Image (768x768 resolution)*: [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) with [`StableDiffusionPipeline`]
- *Image Inpainting (512x512 resolution)*: [stabilityai/stable-diffusion-2-inpainting](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting) with [`StableDiffusionInpaintPipeline`]
- *Image Upscaling (x4 resolution resolution)*: [stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler) [`StableDiffusionUpscalePipeline`]
- *Depth-to-Image (512x512 resolution)*: [stabilityai/stable-diffusion-2-depth](https://huggingface.co/stabilityai/stable-diffusion-2-depth) with [`StableDiffusionDepth2ImagePipeline`]
We recommend using the [`DPMSolverMultistepScheduler`] as it's currently the fastest scheduler there is.
......@@ -125,6 +126,37 @@ upscaled_image = pipeline(prompt=prompt, image=low_res_img).images[0]
upscaled_image.save("upsampled_cat.png")
```
- *Depth-Guided Text-to-Image*: [stabilityai/stable-diffusion-2-depth](https://huggingface.co/stabilityai/stable-diffusion-2-depth) [`StableDiffusionDepth2ImagePipeline`]
**Installation**
```bash
!pip install -U git+https://github.com/huggingface/transformers.git
!pip install diffusers[torch]
```
**Example**
```python
import torch
import requests
from PIL import Image
from diffusers import StableDiffusionDepth2ImgPipeline
pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-depth",
torch_dtype=torch.float16,
).to("cuda")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
init_image = Image.open(requests.get(url, stream=True).raw)
prompt = "two tigers"
n_propmt = "bad, deformed, ugly, bad anotomy"
image = pipe(prompt=prompt, image=init_image, negative_prompt=n_propmt, strength=0.7).images[0]
```
### How to load and use different schedulers.
The stable diffusion pipeline uses [`DDIMScheduler`] scheduler by default. But `diffusers` provides many other schedulers that can be used with the stable diffusion pipeline such as [`PNDMScheduler`], [`LMSDiscreteScheduler`], [`EulerDiscreteScheduler`], [`EulerAncestralDiscreteScheduler`] etc.
......
......@@ -107,7 +107,7 @@ _deps = [
"tensorboard",
"torch>=1.4",
"torchvision",
"transformers>=4.21.0",
"transformers>=4.25.1",
]
# this is a lookup table with items like:
......
......@@ -12,11 +12,24 @@ from .utils import (
is_scipy_available,
is_torch_available,
is_transformers_available,
is_transformers_version,
is_unidecode_available,
logging,
)
# Make sure `transformers` is up to date
if is_transformers_available():
import transformers
if is_transformers_version("<", "4.25.1"):
raise ImportError(
f"`diffusers` requires transformers >= 4.25.1 to function correctly, but {transformers.__version__} was"
" found in your environment. You can upgrade it with pip: `pip install transformers --upgrade`"
)
else:
pass
try:
if not is_torch_available():
raise OptionalDependencyNotAvailable()
......@@ -87,6 +100,7 @@ else:
CycleDiffusionPipeline,
LDMTextToImagePipeline,
PaintByExamplePipeline,
StableDiffusionDepth2ImgPipeline,
StableDiffusionImageVariationPipeline,
StableDiffusionImg2ImgPipeline,
StableDiffusionInpaintPipeline,
......
......@@ -31,5 +31,5 @@ deps = {
"tensorboard": "tensorboard",
"torch": "torch>=1.4",
"torchvision": "torchvision",
"transformers": "transformers>=4.21.0",
"transformers": "transformers>=4.25.1",
}
......@@ -44,6 +44,7 @@ else:
from .paint_by_example import PaintByExamplePipeline
from .stable_diffusion import (
CycleDiffusionPipeline,
StableDiffusionDepth2ImgPipeline,
StableDiffusionImageVariationPipeline,
StableDiffusionImg2ImgPipeline,
StableDiffusionInpaintPipeline,
......
......@@ -46,13 +46,23 @@ if is_transformers_available() and is_torch_available():
from .safety_checker import StableDiffusionSafetyChecker
try:
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0.dev0")):
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import StableDiffusionImageVariationPipeline
else:
from .pipeline_stable_diffusion_image_variation import StableDiffusionImageVariationPipeline
try:
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.26.0.dev0")):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import StableDiffusionDepth2ImgPipeline
else:
from .pipeline_stable_diffusion_depth2img import StableDiffusionDepth2ImgPipeline
try:
if not (is_torch_available() and is_transformers_available() and is_k_diffusion_available()):
raise OptionalDependencyNotAvailable()
......
......@@ -7,7 +7,7 @@ from ...utils import (
try:
if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0.dev0")):
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import (
......
......@@ -79,6 +79,21 @@ class PaintByExamplePipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"])
class StableDiffusionDepth2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])
class StableDiffusionImageVariationPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
......
......@@ -310,12 +310,6 @@ LIBROSA_IMPORT_ERROR = """
installation page: https://librosa.org/doc/latest/install.html and follow the ones that match your environment.
"""
# docstyle-ignore
TENSORFLOW_IMPORT_ERROR = """
{0} requires the TensorFlow library but it was not found in your environment. Checkout the instructions on the
installation page: https://www.tensorflow.org/install and follow the ones that match your environment.
"""
# docstyle-ignore
TRANSFORMERS_IMPORT_ERROR = """
{0} requires the transformers library but it was not found in your environment. You can install it with pip: `pip
......@@ -341,7 +335,6 @@ BACKENDS_MAPPING = OrderedDict(
("inflect", (is_inflect_available, INFLECT_IMPORT_ERROR)),
("onnx", (is_onnx_available, ONNX_IMPORT_ERROR)),
("scipy", (is_scipy_available, SCIPY_IMPORT_ERROR)),
("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
("transformers", (is_transformers_available, TRANSFORMERS_IMPORT_ERROR)),
("unidecode", (is_unidecode_available, UNIDECODE_IMPORT_ERROR)),
......@@ -361,12 +354,7 @@ def requires_backends(obj, backends):
if failed:
raise ImportError("".join(failed))
if name in [
"VersatileDiffusionTextToImagePipeline",
"VersatileDiffusionPipeline",
"VersatileDiffusionDualGuidedPipeline",
"StableDiffusionImageVariationPipeline",
] and is_transformers_version("<", "4.25.0.dev0"):
if name in ["StableDiffusionDepth2ImgPipeline"] and is_transformers_version("<", "4.26.0.dev0"):
raise ImportError(
f"You need to install `transformers` from 'main' in order to use {name}: \n```\n pip install"
" git+https://github.com/huggingface/transformers \n```"
......
......@@ -26,7 +26,6 @@ import torch
import PIL
import safetensors.torch
import transformers
from diffusers import (
AutoencoderKL,
DDIMPipeline,
......@@ -533,9 +532,8 @@ class PipelineFastTests(unittest.TestCase):
# Validate that the text encoder safetensor exists and are of the correct format
text_encoder_path = os.path.join(tmpdirname, "text_encoder", "model.safetensors")
if transformers.__version__ >= "4.25.1":
assert os.path.exists(text_encoder_path), f"Could not find {text_encoder_path}"
_ = safetensors.torch.load_file(text_encoder_path)
assert os.path.exists(text_encoder_path), f"Could not find {text_encoder_path}"
_ = safetensors.torch.load_file(text_encoder_path)
pipeline = StableDiffusionPipeline.from_pretrained(tmpdirname)
assert pipeline.unet is not None
......
......@@ -11,7 +11,13 @@ from typing import Callable, Union
import numpy as np
import torch
from diffusers import CycleDiffusionPipeline, DanceDiffusionPipeline, DiffusionPipeline, StableDiffusionImg2ImgPipeline
from diffusers import (
CycleDiffusionPipeline,
DanceDiffusionPipeline,
DiffusionPipeline,
StableDiffusionDepth2ImgPipeline,
StableDiffusionImg2ImgPipeline,
)
from diffusers.utils.import_utils import is_accelerate_available, is_xformers_available
from diffusers.utils.testing_utils import require_torch, torch_device
......@@ -281,6 +287,7 @@ class PipelineTesterMixin:
DanceDiffusionPipeline,
CycleDiffusionPipeline,
StableDiffusionImg2ImgPipeline,
StableDiffusionDepth2ImgPipeline,
):
# FIXME: inconsistent outputs on MPS
return
......
......@@ -91,7 +91,8 @@ def read_init():
objects.append(line[8:-2])
line_index += 1
backend_specific_objects[backend] = objects
if len(objects) > 0:
backend_specific_objects[backend] = objects
else:
line_index += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment