Unverified Commit b134f6a8 authored by takuoko's avatar takuoko Committed by GitHub
Browse files

[Community] ControlNet Reference (#3508)



add controlnet reference and bugfix
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
parent edc65051
......@@ -1324,7 +1324,7 @@ image.save('tensorrt_img2img_new_zealand_hills.png')
### Stable Diffusion Reference
This pipeline uses the Reference only Control. Refer to the [sd-webui-controlnet discussion](https://github.com/Mikubill/sd-webui-controlnet/discussions/1236).
This pipeline uses the Reference Control. Refer to the [sd-webui-controlnet discussion: Reference-only Control](https://github.com/Mikubill/sd-webui-controlnet/discussions/1236)[sd-webui-controlnet discussion: Reference-adain Control](https://github.com/Mikubill/sd-webui-controlnet/discussions/1280).
```py
......@@ -1365,6 +1365,54 @@ Output Image of `reference_attn=True` and `reference_adain=True`
![output_image](https://github.com/huggingface/diffusers/assets/24734142/3c5255d6-867d-4d35-b202-8dfd30cc6827)
### Stable Diffusion ControlNet Reference
This pipeline uses the Reference Control with ControlNet. Refer to the [sd-webui-controlnet discussion: Reference-only Control](https://github.com/Mikubill/sd-webui-controlnet/discussions/1236)[sd-webui-controlnet discussion: Reference-adain Control](https://github.com/Mikubill/sd-webui-controlnet/discussions/1280).
```py
import cv2
import torch
import numpy as np
from PIL import Image
from diffusers import UniPCMultistepScheduler
from diffusers.utils import load_image
input_image = load_image("https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png")
# get canny image
image = cv2.Canny(np.array(input_image), 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetReferencePipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
safety_checker=None,
torch_dtype=torch.float16
).to('cuda:0')
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
result_img = pipe(ref_image=input_image,
prompt="1girl",
image=canny_image,
num_inference_steps=20,
reference_attn=True,
reference_adain=True).images[0]
```
Reference Image
![reference_image](https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png)
Output Image
![output_image](https://github.com/huggingface/diffusers/assets/24734142/7b9a5830-f173-4b92-b0cf-73d0e9c01d60)
### Stable Diffusion on IPEX
This diffusion pipeline aims to accelarate the inference of Stable-Diffusion on Intel Xeon CPUs with BF16/FP32 precision using [IPEX](https://github.com/intel/intel-extension-for-pytorch).
......@@ -1462,4 +1510,3 @@ latency = elapsed_time(pipe4)
print("Latency of StableDiffusionPipeline--fp32",latency)
```
This diff is collapsed.
# Inspired by: https://github.com/Mikubill/sd-webui-controlnet/discussions/1236
from typing import Any, Callable, Dict, List, Optional, Union
# Inspired by: https://github.com/Mikubill/sd-webui-controlnet/discussions/1236 and https://github.com/Mikubill/sd-webui-controlnet/discussions/1280
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import numpy as np
import PIL.Image
......@@ -162,7 +162,7 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
def __call__(
self,
prompt: Union[str, List[str]] = None,
ref_image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None,
ref_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
height: Optional[int] = None,
width: Optional[int] = None,
num_inference_steps: int = 50,
......@@ -356,12 +356,13 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
def hacked_basic_transformer_inner_forward(
self,
hidden_states,
encoder_hidden_states=None,
timestep=None,
attention_mask=None,
cross_attention_kwargs=None,
class_labels=None,
hidden_states: torch.FloatTensor,
attention_mask: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
timestep: Optional[torch.LongTensor] = None,
cross_attention_kwargs: Dict[str, Any] = None,
class_labels: Optional[torch.LongTensor] = None,
):
if self.use_ada_layer_norm:
norm_hidden_states = self.norm1(hidden_states, timestep)
......@@ -427,7 +428,7 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
attn_output = self.attn2(
norm_hidden_states,
encoder_hidden_states=encoder_hidden_states,
attention_mask=attention_mask,
attention_mask=encoder_attention_mask,
**cross_attention_kwargs,
)
hidden_states = attn_output + hidden_states
......@@ -473,11 +474,12 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
def hack_CrossAttnDownBlock2D_forward(
self,
hidden_states,
temb=None,
encoder_hidden_states=None,
attention_mask=None,
cross_attention_kwargs=None,
hidden_states: torch.FloatTensor,
temb: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
attention_mask: Optional[torch.FloatTensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
):
eps = 1e-6
......@@ -490,6 +492,8 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
hidden_states,
encoder_hidden_states=encoder_hidden_states,
cross_attention_kwargs=cross_attention_kwargs,
attention_mask=attention_mask,
encoder_attention_mask=encoder_attention_mask,
return_dict=False,
)[0]
if MODE == "write":
......@@ -566,13 +570,14 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
def hacked_CrossAttnUpBlock2D_forward(
self,
hidden_states,
res_hidden_states_tuple,
temb=None,
encoder_hidden_states=None,
cross_attention_kwargs=None,
upsample_size=None,
attention_mask=None,
hidden_states: torch.FloatTensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
temb: Optional[torch.FloatTensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
upsample_size: Optional[int] = None,
attention_mask: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None,
):
eps = 1e-6
# TODO(Patrick, William) - attention mask is not used
......@@ -586,6 +591,8 @@ class StableDiffusionReferencePipeline(StableDiffusionPipeline):
hidden_states,
encoder_hidden_states=encoder_hidden_states,
cross_attention_kwargs=cross_attention_kwargs,
attention_mask=attention_mask,
encoder_attention_mask=encoder_attention_mask,
return_dict=False,
)[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment