Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
9b704f76
Unverified
Commit
9b704f76
authored
Sep 03, 2022
by
Patrick von Platen
Committed by
GitHub
Sep 03, 2022
Browse files
[Img2Img2] Re-add K LMS scheduler (#340)
parent
e49dd03d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
102 additions
and
13 deletions
+102
-13
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
...nes/stable_diffusion/pipeline_stable_diffusion_img2img.py
+27
-9
tests/test_pipelines.py
tests/test_pipelines.py
+75
-4
No files found.
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
View file @
9b704f76
...
...
@@ -5,12 +5,11 @@ import numpy as np
import
torch
import
PIL
from
tqdm.auto
import
tqdm
from
transformers
import
CLIPFeatureExtractor
,
CLIPTextModel
,
CLIPTokenizer
from
...models
import
AutoencoderKL
,
UNet2DConditionModel
from
...pipeline_utils
import
DiffusionPipeline
from
...schedulers
import
DDIMScheduler
,
PNDMScheduler
from
...schedulers
import
DDIMScheduler
,
LMSDiscreteScheduler
,
PNDMScheduler
from
.safety_checker
import
StableDiffusionSafetyChecker
...
...
@@ -31,7 +30,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
text_encoder
:
CLIPTextModel
,
tokenizer
:
CLIPTokenizer
,
unet
:
UNet2DConditionModel
,
scheduler
:
Union
[
DDIMScheduler
,
PNDMScheduler
],
scheduler
:
Union
[
DDIMScheduler
,
PNDMScheduler
,
LMSDiscreteScheduler
],
safety_checker
:
StableDiffusionSafetyChecker
,
feature_extractor
:
CLIPFeatureExtractor
,
):
...
...
@@ -93,12 +92,17 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
# get the original timestep using init_timestep
init_timestep
=
int
(
num_inference_steps
*
strength
)
+
offset
init_timestep
=
min
(
init_timestep
,
num_inference_steps
)
timesteps
=
self
.
scheduler
.
timesteps
[
-
init_timestep
]
timesteps
=
torch
.
tensor
([
timesteps
]
*
batch_size
,
dtype
=
torch
.
long
,
device
=
self
.
device
)
if
isinstance
(
self
.
scheduler
,
LMSDiscreteScheduler
):
timesteps
=
torch
.
tensor
(
[
num_inference_steps
-
init_timestep
]
*
batch_size
,
dtype
=
torch
.
long
,
device
=
self
.
device
)
else
:
timesteps
=
self
.
scheduler
.
timesteps
[
-
init_timestep
]
timesteps
=
torch
.
tensor
([
timesteps
]
*
batch_size
,
dtype
=
torch
.
long
,
device
=
self
.
device
)
# add noise to latents using the timesteps
noise
=
torch
.
randn
(
init_latents
.
shape
,
generator
=
generator
,
device
=
self
.
device
)
init_latents
=
self
.
scheduler
.
add_noise
(
init_latents
,
noise
,
timesteps
)
init_latents
=
self
.
scheduler
.
add_noise
(
init_latents
,
noise
,
timesteps
)
.
to
(
self
.
device
)
# get prompt text embeddings
text_input
=
self
.
tokenizer
(
...
...
@@ -137,11 +141,22 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
extra_step_kwargs
[
"eta"
]
=
eta
latents
=
init_latents
t_start
=
max
(
num_inference_steps
-
init_timestep
+
offset
,
0
)
for
i
,
t
in
tqdm
(
enumerate
(
self
.
scheduler
.
timesteps
[
t_start
:])):
for
i
,
t
in
enumerate
(
self
.
progress_bar
(
self
.
scheduler
.
timesteps
[
t_start
:])):
t_index
=
t_start
+
i
# expand the latents if we are doing classifier free guidance
latent_model_input
=
torch
.
cat
([
latents
]
*
2
)
if
do_classifier_free_guidance
else
latents
# if we use LMSDiscreteScheduler, let's make sure latents are mulitplied by sigmas
if
isinstance
(
self
.
scheduler
,
LMSDiscreteScheduler
):
sigma
=
self
.
scheduler
.
sigmas
[
t_index
]
# the model input needs to be scaled to match the continuous ODE formulation in K-LMS
latent_model_input
=
latent_model_input
/
((
sigma
**
2
+
1
)
**
0.5
)
latent_model_input
=
latent_model_input
.
to
(
self
.
unet
.
dtype
)
t
=
t
.
to
(
self
.
unet
.
dtype
)
# predict the noise residual
noise_pred
=
self
.
unet
(
latent_model_input
,
t
,
encoder_hidden_states
=
text_embeddings
)[
"sample"
]
...
...
@@ -151,11 +166,14 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
noise_pred
=
noise_pred_uncond
+
guidance_scale
*
(
noise_pred_text
-
noise_pred_uncond
)
# compute the previous noisy sample x_t -> x_t-1
latents
=
self
.
scheduler
.
step
(
noise_pred
,
t
,
latents
,
**
extra_step_kwargs
)[
"prev_sample"
]
if
isinstance
(
self
.
scheduler
,
LMSDiscreteScheduler
):
latents
=
self
.
scheduler
.
step
(
noise_pred
,
t_index
,
latents
,
**
extra_step_kwargs
)[
"prev_sample"
]
else
:
latents
=
self
.
scheduler
.
step
(
noise_pred
,
t
,
latents
,
**
extra_step_kwargs
)[
"prev_sample"
]
# scale and decode the image latents with vae
latents
=
1
/
0.18215
*
latents
image
=
self
.
vae
.
decode
(
latents
)
image
=
self
.
vae
.
decode
(
latents
.
to
(
self
.
vae
.
dtype
)
)
image
=
(
image
/
2
+
0.5
).
clamp
(
0
,
1
)
image
=
image
.
cpu
().
permute
(
0
,
2
,
3
,
1
).
numpy
()
...
...
tests/test_pipelines.py
View file @
9b704f76
...
...
@@ -445,6 +445,49 @@ class PipelineFastTests(unittest.TestCase):
expected_slice
=
np
.
array
([
0.4492
,
0.3865
,
0.4222
,
0.5854
,
0.5139
,
0.4379
,
0.4193
,
0.48
,
0.4218
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_img2img_k_lms
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
scheduler
=
LMSDiscreteScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
init_image
=
self
.
dummy_image
.
to
(
device
)
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionImg2ImgPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
self
.
dummy_safety_checker
,
feature_extractor
=
self
.
dummy_extractor
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
init_image
=
init_image
,
)
image
=
output
[
"sample"
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
32
,
32
,
3
)
expected_slice
=
np
.
array
([
0.4367
,
0.4986
,
0.4372
,
0.6706
,
0.5665
,
0.444
,
0.5864
,
0.6019
,
0.5203
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_inpaint
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
...
...
@@ -892,7 +935,7 @@ class PipelineTesterMixin(unittest.TestCase):
def
test_stable_diffusion_img2img_pipeline
(
self
):
ds
=
load_dataset
(
"hf-internal-testing/diffusers-images"
,
split
=
"train"
)
init_image
=
ds
[
1
][
"image"
].
resize
((
768
,
512
))
init_image
=
ds
[
2
][
"image"
].
resize
((
768
,
512
))
output_image
=
ds
[
0
][
"image"
].
resize
((
768
,
512
))
model_id
=
"CompVis/stable-diffusion-v1-4"
...
...
@@ -915,12 +958,40 @@ class PipelineTesterMixin(unittest.TestCase):
@
slow
@
unittest
.
skipIf
(
torch_device
==
"cpu"
,
"Stable diffusion is supposed to run on GPU"
)
def
test_stable_diffusion_i
n_paint
_pipeline
(
self
):
def
test_stable_diffusion_i
mg2img
_pipeline
_k_lms
(
self
):
ds
=
load_dataset
(
"hf-internal-testing/diffusers-images"
,
split
=
"train"
)
init_image
=
ds
[
2
][
"image"
].
resize
((
768
,
512
))
mask_image
=
ds
[
3
][
"image"
].
resize
((
768
,
512
))
output_image
=
ds
[
4
][
"image"
].
resize
((
768
,
512
))
output_image
=
ds
[
1
][
"image"
].
resize
((
768
,
512
))
lms
=
LMSDiscreteScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
)
model_id
=
"CompVis/stable-diffusion-v1-4"
pipe
=
StableDiffusionImg2ImgPipeline
.
from_pretrained
(
model_id
,
scheduler
=
lms
,
use_auth_token
=
True
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A fantasy landscape, trending on artstation"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
pipe
(
prompt
=
prompt
,
init_image
=
init_image
,
strength
=
0.75
,
guidance_scale
=
7.5
,
generator
=
generator
)[
"sample"
][
0
]
expected_array
=
np
.
array
(
output_image
)
sampled_array
=
np
.
array
(
image
)
assert
sampled_array
.
shape
==
(
512
,
768
,
3
)
assert
np
.
max
(
np
.
abs
(
sampled_array
-
expected_array
))
<
1e-4
@
slow
@
unittest
.
skipIf
(
torch_device
==
"cpu"
,
"Stable diffusion is supposed to run on GPU"
)
def
test_stable_diffusion_in_paint_pipeline
(
self
):
ds
=
load_dataset
(
"hf-internal-testing/diffusers-images"
,
split
=
"train"
)
init_image
=
ds
[
3
][
"image"
].
resize
((
768
,
512
))
mask_image
=
ds
[
4
][
"image"
].
resize
((
768
,
512
))
output_image
=
ds
[
5
][
"image"
].
resize
((
768
,
512
))
model_id
=
"CompVis/stable-diffusion-v1-4"
pipe
=
StableDiffusionInpaintPipeline
.
from_pretrained
(
model_id
,
use_auth_token
=
True
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment