Unverified Commit 95ea538c authored by YiYi Xu's avatar YiYi Xu Committed by GitHub
Browse files

Add ddpm kandinsky (#3783)



* update doc

---------
Co-authored-by: default avataryiyixuxu <yixu310@gmail,com>
parent ef3844d3
...@@ -55,6 +55,20 @@ t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1" ...@@ -55,6 +55,20 @@ t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1"
t2i_pipe.to("cuda") t2i_pipe.to("cuda")
``` ```
<Tip warning={true}>
By default, the text-to-image pipeline use [`DDIMScheduler`], you can change the scheduler to [`DDPMScheduler`]
```py
scheduler = DDPMScheduler.from_pretrained("kandinsky-community/kandinsky-2-1", subfolder="ddpm_scheduler")
t2i_pipe = DiffusionPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-1", scheduler=scheduler, torch_dtype=torch.float16
)
t2i_pipe.to("cuda")
```
</Tip>
Now we pass the prompt through the prior to generate image embeddings. The prior Now we pass the prompt through the prior to generate image embeddings. The prior
returns both the image embeddings corresponding to the prompt and negative/unconditional image returns both the image embeddings corresponding to the prompt and negative/unconditional image
embeddings corresponding to an empty string. embeddings corresponding to an empty string.
......
...@@ -22,7 +22,7 @@ from transformers import ( ...@@ -22,7 +22,7 @@ from transformers import (
from ...models import UNet2DConditionModel, VQModel from ...models import UNet2DConditionModel, VQModel
from ...pipelines import DiffusionPipeline from ...pipelines import DiffusionPipeline
from ...pipelines.pipeline_utils import ImagePipelineOutput from ...pipelines.pipeline_utils import ImagePipelineOutput
from ...schedulers import DDIMScheduler from ...schedulers import DDIMScheduler, DDPMScheduler
from ...utils import ( from ...utils import (
is_accelerate_available, is_accelerate_available,
is_accelerate_version, is_accelerate_version,
...@@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline):
Frozen text-encoder. Frozen text-encoder.
tokenizer ([`XLMRobertaTokenizer`]): tokenizer ([`XLMRobertaTokenizer`]):
Tokenizer of class Tokenizer of class
scheduler ([`DDIMScheduler`]): scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
A scheduler to be used in combination with `unet` to generate image latents. A scheduler to be used in combination with `unet` to generate image latents.
unet ([`UNet2DConditionModel`]): unet ([`UNet2DConditionModel`]):
Conditional U-Net architecture to denoise the image embedding. Conditional U-Net architecture to denoise the image embedding.
...@@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline):
text_encoder: MultilingualCLIP, text_encoder: MultilingualCLIP,
tokenizer: XLMRobertaTokenizer, tokenizer: XLMRobertaTokenizer,
unet: UNet2DConditionModel, unet: UNet2DConditionModel,
scheduler: DDIMScheduler, scheduler: Union[DDIMScheduler, DDPMScheduler],
movq: VQModel, movq: VQModel,
): ):
super().__init__() super().__init__()
...@@ -439,9 +439,6 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -439,9 +439,6 @@ class KandinskyPipeline(DiffusionPipeline):
noise_pred, noise_pred,
t, t,
latents, latents,
# YiYi notes: only reason this pipeline can't work with unclip scheduler is that can't pass down this argument
# need to use DDPM scheduler instead
# prev_timestep=prev_timestep,
generator=generator, generator=generator,
).prev_sample ).prev_sample
# post-processing # post-processing
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment