Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
diffusers
Commits
05a36d5c
Unverified
Commit
05a36d5c
authored
Nov 24, 2022
by
Patrick von Platen
Committed by
GitHub
Nov 24, 2022
Browse files
Upscaling fixed (#1402)
* Upscaling fixed * up * more fixes * fix * more fixes * finish again * up
parent
cbfed0c2
Changes
26
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
124 additions
and
86 deletions
+124
-86
src/diffusers/pipeline_utils.py
src/diffusers/pipeline_utils.py
+10
-10
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
...ffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+6
-5
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
...pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+1
-0
src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
...s/pipelines/latent_diffusion/pipeline_latent_diffusion.py
+5
-4
src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py
...elines/stable_diffusion/pipeline_flax_stable_diffusion.py
+13
-7
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py
...elines/stable_diffusion/pipeline_onnx_stable_diffusion.py
+9
-3
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
...table_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
+11
-5
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
...iffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
+4
-3
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
...s/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+6
-5
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
...le_diffusion/pipeline_stable_diffusion_image_variation.py
+6
-5
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
...nes/stable_diffusion/pipeline_stable_diffusion_img2img.py
+1
-0
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
...nes/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+9
-6
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
...ble_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+4
-3
src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
...s/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion.py
...lines/versatile_diffusion/pipeline_versatile_diffusion.py
+7
-6
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py
...ile_diffusion/pipeline_versatile_diffusion_dual_guided.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py
...diffusion/pipeline_versatile_diffusion_image_variation.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py
...e_diffusion/pipeline_versatile_diffusion_text_to_image.py
+6
-5
tests/pipelines/altdiffusion/test_alt_diffusion.py
tests/pipelines/altdiffusion/test_alt_diffusion.py
+6
-2
tests/pipelines/latent_diffusion/test_latent_diffusion.py
tests/pipelines/latent_diffusion/test_latent_diffusion.py
+2
-2
No files found.
src/diffusers/pipeline_utils.py
View file @
05a36d5c
...
...
@@ -544,7 +544,14 @@ class DiffusionPipeline(ConfigMixin):
init_kwargs
=
{
**
init_kwargs
,
**
passed_pipe_kwargs
}
# remove `null` components
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
v
[
0
]
is
not
None
}
def
load_module
(
name
,
value
):
if
value
[
0
]
is
None
:
return
False
if
name
in
passed_class_obj
and
passed_class_obj
[
name
]
is
None
:
return
False
return
True
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
if
len
(
unused_kwargs
)
>
0
:
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
not recognized."
)
...
...
@@ -560,12 +567,11 @@ class DiffusionPipeline(ConfigMixin):
is_pipeline_module
=
hasattr
(
pipelines
,
library_name
)
loaded_sub_model
=
None
sub_model_should_be_defined
=
True
# if the model is in a pipeline module, then we load it from the pipeline
if
name
in
passed_class_obj
:
# 1. check that passed_class_obj has correct parent class
if
not
is_pipeline_module
and
passed_class_obj
[
name
]
is
not
None
:
if
not
is_pipeline_module
:
library
=
importlib
.
import_module
(
library_name
)
class_obj
=
getattr
(
library
,
class_name
)
importable_classes
=
LOADABLE_CLASSES
[
library_name
]
...
...
@@ -581,12 +587,6 @@ class DiffusionPipeline(ConfigMixin):
f
"
{
passed_class_obj
[
name
]
}
is of type:
{
type
(
passed_class_obj
[
name
])
}
, but should be"
f
"
{
expected_class_obj
}
"
)
elif
passed_class_obj
[
name
]
is
None
and
name
not
in
pipeline_class
.
_optional_components
:
logger
.
warning
(
f
"You have passed `None` for
{
name
}
to disable its functionality in
{
pipeline_class
}
. Note"
f
" that this might lead to problems when using
{
pipeline_class
}
and is not recommended."
)
sub_model_should_be_defined
=
False
else
:
logger
.
warning
(
f
"You have passed a non-standard module
{
passed_class_obj
[
name
]
}
. We cannot verify whether it"
...
...
@@ -608,7 +608,7 @@ class DiffusionPipeline(ConfigMixin):
importable_classes
=
LOADABLE_CLASSES
[
library_name
]
class_candidates
=
{
c
:
getattr
(
library
,
c
,
None
)
for
c
in
importable_classes
.
keys
()}
if
loaded_sub_model
is
None
and
sub_model_should_be_defined
:
if
loaded_sub_model
is
None
:
load_method_name
=
None
for
class_name
,
class_candidate
in
class_candidates
.
items
():
if
class_candidate
is
not
None
and
issubclass
(
class_obj
,
class_candidate
):
...
...
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
View file @
05a36d5c
...
...
@@ -141,6 +141,7 @@ class AltDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -379,7 +380,7 @@ class AltDiffusionPipeline(DiffusionPipeline):
)
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -420,9 +421,9 @@ class AltDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -469,8 +470,8 @@ class AltDiffusionPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
View file @
05a36d5c
...
...
@@ -154,6 +154,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_attention_slicing
(
self
,
slice_size
:
Optional
[
Union
[
str
,
int
]]
=
"auto"
):
...
...
src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
View file @
05a36d5c
...
...
@@ -60,6 +60,7 @@ class LDMTextToImagePipeline(DiffusionPipeline):
):
super
().
__init__
()
self
.
register_modules
(
vqvae
=
vqvae
,
bert
=
bert
,
tokenizer
=
tokenizer
,
unet
=
unet
,
scheduler
=
scheduler
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vqvae
.
config
.
block_out_channels
)
-
1
)
@
torch
.
no_grad
()
def
__call__
(
...
...
@@ -79,9 +80,9 @@ class LDMTextToImagePipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -107,8 +108,8 @@ class LDMTextToImagePipeline(DiffusionPipeline):
generated images.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -106,6 +106,7 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
def
prepare_inputs
(
self
,
prompt
:
Union
[
str
,
List
[
str
]]):
if
not
isinstance
(
prompt
,
(
str
,
list
)):
...
...
@@ -168,8 +169,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
neg_prompt_ids
:
jnp
.
array
=
None
,
):
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
height
%
8
!=
0
or
width
%
8
!=
0
:
raise
ValueError
(
f
"`height` and `width` have to be divisible by 8 but are
{
height
}
and
{
width
}
."
)
...
...
@@ -192,7 +193,12 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
uncond_embeddings
=
self
.
text_encoder
(
uncond_input
,
params
=
params
[
"text_encoder"
])[
0
]
context
=
jnp
.
concatenate
([
uncond_embeddings
,
text_embeddings
])
latents_shape
=
(
batch_size
,
self
.
unet
.
in_channels
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
,
self
.
unet
.
in_channels
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
if
latents
is
None
:
latents
=
jax
.
random
.
normal
(
prng_seed
,
shape
=
latents_shape
,
dtype
=
jnp
.
float32
)
else
:
...
...
@@ -269,9 +275,9 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -307,8 +313,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
"not-safe-for-work" (nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
jit
:
images
=
_p_generate
(
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -108,6 +108,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
_encode_prompt
(
self
,
prompt
,
num_images_per_prompt
,
do_classifier_free_guidance
,
negative_prompt
):
...
...
@@ -206,8 +207,8 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
**
kwargs
,
):
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
@@ -241,7 +242,12 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
# get the initial random noise unless the user supplied it
latents_dtype
=
text_embeddings
.
dtype
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
4
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
4
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
if
latents
is
None
:
latents
=
generator
.
randn
(
*
latents_shape
).
astype
(
latents_dtype
)
elif
latents
.
shape
!=
latents_shape
:
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
View file @
05a36d5c
...
...
@@ -158,6 +158,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt
...
...
@@ -273,9 +274,9 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -321,8 +322,8 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
@@ -358,7 +359,12 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
)
num_channels_latents
=
NUM_LATENT_CHANNELS
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
num_channels_latents
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
latents_dtype
=
text_embeddings
.
dtype
if
latents
is
None
:
latents
=
generator
.
randn
(
*
latents_shape
).
astype
(
latents_dtype
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
View file @
05a36d5c
...
...
@@ -27,11 +27,11 @@ def preprocess(image):
return
2.0
*
image
-
1.0
def
preprocess_mask
(
mask
):
def
preprocess_mask
(
mask
,
scale_factor
=
8
):
mask
=
mask
.
convert
(
"L"
)
w
,
h
=
mask
.
size
w
,
h
=
map
(
lambda
x
:
x
-
x
%
32
,
(
w
,
h
))
# resize to integer multiple of 32
mask
=
mask
.
resize
((
w
//
8
,
h
//
8
),
resample
=
PIL
.
Image
.
NEAREST
)
mask
=
mask
.
resize
((
w
//
scale_factor
,
h
//
scale_factor
),
resample
=
PIL
.
Image
.
NEAREST
)
mask
=
np
.
array
(
mask
).
astype
(
np
.
float32
)
/
255.0
mask
=
np
.
tile
(
mask
,
(
4
,
1
,
1
))
mask
=
mask
[
None
].
transpose
(
0
,
1
,
2
,
3
)
# what does this step do?
...
...
@@ -143,6 +143,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt
...
...
@@ -349,7 +350,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
# preprocess mask
if
not
isinstance
(
mask_image
,
np
.
ndarray
):
mask_image
=
preprocess_mask
(
mask_image
)
mask_image
=
preprocess_mask
(
mask_image
,
self
.
vae_scale_factor
)
mask_image
=
mask_image
.
astype
(
latents_dtype
)
mask
=
np
.
concatenate
([
mask_image
]
*
num_images_per_prompt
,
axis
=
0
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -140,6 +140,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -378,7 +379,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
)
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -419,9 +420,9 @@ class StableDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -468,8 +469,8 @@ class StableDiffusionPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
View file @
05a36d5c
...
...
@@ -108,6 +108,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention
...
...
@@ -281,7 +282,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -324,9 +325,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
configuration of
[this](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json)
`CLIPFeatureExtractor`
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -370,8 +371,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
View file @
05a36d5c
...
...
@@ -153,6 +153,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
View file @
05a36d5c
...
...
@@ -218,6 +218,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
@@ -468,7 +469,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -490,7 +491,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
# resize the mask to latents shape as we concatenate the mask to the latents
# we do that before converting to dtype to avoid breaking in case we're using cpu_offload
# and half precision
mask
=
torch
.
nn
.
functional
.
interpolate
(
mask
,
size
=
(
height
//
8
,
width
//
8
))
mask
=
torch
.
nn
.
functional
.
interpolate
(
mask
,
size
=
(
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
)
mask
=
mask
.
to
(
device
=
device
,
dtype
=
dtype
)
masked_image
=
masked_image
.
to
(
device
=
device
,
dtype
=
dtype
)
...
...
@@ -547,9 +550,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -596,8 +599,8 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
View file @
05a36d5c
...
...
@@ -51,11 +51,11 @@ def preprocess_image(image):
return
2.0
*
image
-
1.0
def
preprocess_mask
(
mask
):
def
preprocess_mask
(
mask
,
scale_factor
=
8
):
mask
=
mask
.
convert
(
"L"
)
w
,
h
=
mask
.
size
w
,
h
=
map
(
lambda
x
:
x
-
x
%
32
,
(
w
,
h
))
# resize to integer multiple of 32
mask
=
mask
.
resize
((
w
//
8
,
h
//
8
),
resample
=
PIL_INTERPOLATION
[
"nearest"
])
mask
=
mask
.
resize
((
w
//
scale_factor
,
h
//
scale_factor
),
resample
=
PIL_INTERPOLATION
[
"nearest"
])
mask
=
np
.
array
(
mask
).
astype
(
np
.
float32
)
/
255.0
mask
=
np
.
tile
(
mask
,
(
4
,
1
,
1
))
mask
=
mask
[
None
].
transpose
(
0
,
1
,
2
,
3
)
# what does this step do?
...
...
@@ -166,6 +166,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
@@ -541,7 +542,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
init_image
=
preprocess_image
(
init_image
)
if
not
isinstance
(
mask_image
,
torch
.
FloatTensor
):
mask_image
=
preprocess_mask
(
mask_image
)
mask_image
=
preprocess_mask
(
mask_image
,
self
.
vae_scale_factor
)
# 5. set timesteps
self
.
scheduler
.
set_timesteps
(
num_inference_steps
,
device
=
device
)
...
...
src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
View file @
05a36d5c
...
...
@@ -136,6 +136,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
feature_extractor
=
feature_extractor
,
)
self
.
_safety_text_concept
=
safety_concept
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
@
property
...
...
@@ -443,7 +444,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -531,9 +532,9 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -600,8 +601,8 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion.py
View file @
05a36d5c
...
...
@@ -78,6 +78,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
def
enable_attention_slicing
(
self
,
slice_size
:
Optional
[
Union
[
str
,
int
]]
=
"auto"
):
r
"""
...
...
@@ -131,9 +132,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
image (`PIL.Image.Image`, `List[PIL.Image.Image]` or `torch.Tensor`):
The image prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -247,9 +248,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -360,9 +361,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py
View file @
05a36d5c
...
...
@@ -87,6 +87,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
if
self
.
text_unet
is
not
None
and
(
"dual_cross_attention"
not
in
self
.
image_unet
.
config
or
not
self
.
image_unet
.
config
.
dual_cross_attention
...
...
@@ -419,7 +420,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -474,9 +475,9 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -552,8 +553,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
returning a tuple, the first element is a list with the generated images.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py
View file @
05a36d5c
...
...
@@ -71,6 +71,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -277,7 +278,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -318,9 +319,9 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
Args:
image (`PIL.Image.Image`, `List[PIL.Image.Image]` or `torch.Tensor`):
The image prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -392,8 +393,8 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py
View file @
05a36d5c
...
...
@@ -75,6 +75,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
if
self
.
text_unet
is
not
None
:
self
.
_swap_unet_attention_blocks
()
...
...
@@ -337,7 +338,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -378,9 +379,9 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -444,8 +445,8 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
tests/pipelines/altdiffusion/test_alt_diffusion.py
View file @
05a36d5c
...
...
@@ -172,7 +172,9 @@ class AltDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.5748162
,
0.60447145
,
0.48821217
,
0.50100636
,
0.5431185
,
0.45763683
,
0.49657696
,
0.48132733
,
0.47573093
])
expected_slice
=
np
.
array
(
[
0.5748162
,
0.60447145
,
0.48821217
,
0.50100636
,
0.5431185
,
0.45763683
,
0.49657696
,
0.48132733
,
0.47573093
]
)
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
@@ -219,7 +221,9 @@ class AltDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.51605093
,
0.5707241
,
0.47365507
,
0.50578886
,
0.5633877
,
0.4642503
,
0.5182081
,
0.48763484
,
0.49084237
])
expected_slice
=
np
.
array
(
[
0.51605093
,
0.5707241
,
0.47365507
,
0.50578886
,
0.5633877
,
0.4642503
,
0.5182081
,
0.48763484
,
0.49084237
]
)
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
tests/pipelines/latent_diffusion/test_latent_diffusion.py
View file @
05a36d5c
...
...
@@ -111,8 +111,8 @@ class LDMTextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.
507
4
,
0.5
026
,
0.4
998
,
0.4
0
56
,
0.
3523
,
0.4649
,
0.52
89
,
0.5
299
,
0.4897
])
assert
image
.
shape
==
(
1
,
16
,
16
,
3
)
expected_slice
=
np
.
array
([
0.
6806
,
0.545
4
,
0.5
638
,
0.4
893
,
0.4
6
56
,
0.
4257
,
0.6248
,
0.52
17
,
0.5
498
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment