Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
05a36d5c
Unverified
Commit
05a36d5c
authored
Nov 24, 2022
by
Patrick von Platen
Committed by
GitHub
Nov 24, 2022
Browse files
Upscaling fixed (#1402)
* Upscaling fixed * up * more fixes * fix * more fixes * finish again * up
parent
cbfed0c2
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
124 additions
and
86 deletions
+124
-86
src/diffusers/pipeline_utils.py
src/diffusers/pipeline_utils.py
+10
-10
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
...ffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
+6
-5
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
...pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
+1
-0
src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
...s/pipelines/latent_diffusion/pipeline_latent_diffusion.py
+5
-4
src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py
...elines/stable_diffusion/pipeline_flax_stable_diffusion.py
+13
-7
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py
...elines/stable_diffusion/pipeline_onnx_stable_diffusion.py
+9
-3
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
...table_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
+11
-5
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
...iffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
+4
-3
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
...s/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+6
-5
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
...le_diffusion/pipeline_stable_diffusion_image_variation.py
+6
-5
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
...nes/stable_diffusion/pipeline_stable_diffusion_img2img.py
+1
-0
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
...nes/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+9
-6
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
...ble_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
+4
-3
src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
...s/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion.py
...lines/versatile_diffusion/pipeline_versatile_diffusion.py
+7
-6
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py
...ile_diffusion/pipeline_versatile_diffusion_dual_guided.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py
...diffusion/pipeline_versatile_diffusion_image_variation.py
+6
-5
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py
...e_diffusion/pipeline_versatile_diffusion_text_to_image.py
+6
-5
tests/pipelines/altdiffusion/test_alt_diffusion.py
tests/pipelines/altdiffusion/test_alt_diffusion.py
+6
-2
tests/pipelines/latent_diffusion/test_latent_diffusion.py
tests/pipelines/latent_diffusion/test_latent_diffusion.py
+2
-2
No files found.
src/diffusers/pipeline_utils.py
View file @
05a36d5c
...
...
@@ -544,7 +544,14 @@ class DiffusionPipeline(ConfigMixin):
init_kwargs
=
{
**
init_kwargs
,
**
passed_pipe_kwargs
}
# remove `null` components
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
v
[
0
]
is
not
None
}
def
load_module
(
name
,
value
):
if
value
[
0
]
is
None
:
return
False
if
name
in
passed_class_obj
and
passed_class_obj
[
name
]
is
None
:
return
False
return
True
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
if
len
(
unused_kwargs
)
>
0
:
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
not recognized."
)
...
...
@@ -560,12 +567,11 @@ class DiffusionPipeline(ConfigMixin):
is_pipeline_module
=
hasattr
(
pipelines
,
library_name
)
loaded_sub_model
=
None
sub_model_should_be_defined
=
True
# if the model is in a pipeline module, then we load it from the pipeline
if
name
in
passed_class_obj
:
# 1. check that passed_class_obj has correct parent class
if
not
is_pipeline_module
and
passed_class_obj
[
name
]
is
not
None
:
if
not
is_pipeline_module
:
library
=
importlib
.
import_module
(
library_name
)
class_obj
=
getattr
(
library
,
class_name
)
importable_classes
=
LOADABLE_CLASSES
[
library_name
]
...
...
@@ -581,12 +587,6 @@ class DiffusionPipeline(ConfigMixin):
f
"
{
passed_class_obj
[
name
]
}
is of type:
{
type
(
passed_class_obj
[
name
])
}
, but should be"
f
"
{
expected_class_obj
}
"
)
elif
passed_class_obj
[
name
]
is
None
and
name
not
in
pipeline_class
.
_optional_components
:
logger
.
warning
(
f
"You have passed `None` for
{
name
}
to disable its functionality in
{
pipeline_class
}
. Note"
f
" that this might lead to problems when using
{
pipeline_class
}
and is not recommended."
)
sub_model_should_be_defined
=
False
else
:
logger
.
warning
(
f
"You have passed a non-standard module
{
passed_class_obj
[
name
]
}
. We cannot verify whether it"
...
...
@@ -608,7 +608,7 @@ class DiffusionPipeline(ConfigMixin):
importable_classes
=
LOADABLE_CLASSES
[
library_name
]
class_candidates
=
{
c
:
getattr
(
library
,
c
,
None
)
for
c
in
importable_classes
.
keys
()}
if
loaded_sub_model
is
None
and
sub_model_should_be_defined
:
if
loaded_sub_model
is
None
:
load_method_name
=
None
for
class_name
,
class_candidate
in
class_candidates
.
items
():
if
class_candidate
is
not
None
and
issubclass
(
class_obj
,
class_candidate
):
...
...
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
View file @
05a36d5c
...
...
@@ -141,6 +141,7 @@ class AltDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -379,7 +380,7 @@ class AltDiffusionPipeline(DiffusionPipeline):
)
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -420,9 +421,9 @@ class AltDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -469,8 +470,8 @@ class AltDiffusionPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
View file @
05a36d5c
...
...
@@ -154,6 +154,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_attention_slicing
(
self
,
slice_size
:
Optional
[
Union
[
str
,
int
]]
=
"auto"
):
...
...
src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
View file @
05a36d5c
...
...
@@ -60,6 +60,7 @@ class LDMTextToImagePipeline(DiffusionPipeline):
):
super
().
__init__
()
self
.
register_modules
(
vqvae
=
vqvae
,
bert
=
bert
,
tokenizer
=
tokenizer
,
unet
=
unet
,
scheduler
=
scheduler
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vqvae
.
config
.
block_out_channels
)
-
1
)
@
torch
.
no_grad
()
def
__call__
(
...
...
@@ -79,9 +80,9 @@ class LDMTextToImagePipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -107,8 +108,8 @@ class LDMTextToImagePipeline(DiffusionPipeline):
generated images.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -106,6 +106,7 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
def
prepare_inputs
(
self
,
prompt
:
Union
[
str
,
List
[
str
]]):
if
not
isinstance
(
prompt
,
(
str
,
list
)):
...
...
@@ -168,8 +169,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
neg_prompt_ids
:
jnp
.
array
=
None
,
):
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
height
%
8
!=
0
or
width
%
8
!=
0
:
raise
ValueError
(
f
"`height` and `width` have to be divisible by 8 but are
{
height
}
and
{
width
}
."
)
...
...
@@ -192,7 +193,12 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
uncond_embeddings
=
self
.
text_encoder
(
uncond_input
,
params
=
params
[
"text_encoder"
])[
0
]
context
=
jnp
.
concatenate
([
uncond_embeddings
,
text_embeddings
])
latents_shape
=
(
batch_size
,
self
.
unet
.
in_channels
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
,
self
.
unet
.
in_channels
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
if
latents
is
None
:
latents
=
jax
.
random
.
normal
(
prng_seed
,
shape
=
latents_shape
,
dtype
=
jnp
.
float32
)
else
:
...
...
@@ -269,9 +275,9 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -307,8 +313,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline):
"not-safe-for-work" (nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
jit
:
images
=
_p_generate
(
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -108,6 +108,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
_encode_prompt
(
self
,
prompt
,
num_images_per_prompt
,
do_classifier_free_guidance
,
negative_prompt
):
...
...
@@ -206,8 +207,8 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
**
kwargs
,
):
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
@@ -241,7 +242,12 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
# get the initial random noise unless the user supplied it
latents_dtype
=
text_embeddings
.
dtype
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
4
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
4
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
if
latents
is
None
:
latents
=
generator
.
randn
(
*
latents_shape
).
astype
(
latents_dtype
)
elif
latents
.
shape
!=
latents_shape
:
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py
View file @
05a36d5c
...
...
@@ -158,6 +158,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt
...
...
@@ -273,9 +274,9 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -321,8 +322,8 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
if
isinstance
(
prompt
,
str
):
batch_size
=
1
...
...
@@ -358,7 +359,12 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
)
num_channels_latents
=
NUM_LATENT_CHANNELS
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
num_channels_latents
,
height
//
8
,
width
//
8
)
latents_shape
=
(
batch_size
*
num_images_per_prompt
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
,
)
latents_dtype
=
text_embeddings
.
dtype
if
latents
is
None
:
latents
=
generator
.
randn
(
*
latents_shape
).
astype
(
latents_dtype
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint_legacy.py
View file @
05a36d5c
...
...
@@ -27,11 +27,11 @@ def preprocess(image):
return
2.0
*
image
-
1.0
def
preprocess_mask
(
mask
):
def
preprocess_mask
(
mask
,
scale_factor
=
8
):
mask
=
mask
.
convert
(
"L"
)
w
,
h
=
mask
.
size
w
,
h
=
map
(
lambda
x
:
x
-
x
%
32
,
(
w
,
h
))
# resize to integer multiple of 32
mask
=
mask
.
resize
((
w
//
8
,
h
//
8
),
resample
=
PIL
.
Image
.
NEAREST
)
mask
=
mask
.
resize
((
w
//
scale_factor
,
h
//
scale_factor
),
resample
=
PIL
.
Image
.
NEAREST
)
mask
=
np
.
array
(
mask
).
astype
(
np
.
float32
)
/
255.0
mask
=
np
.
tile
(
mask
,
(
4
,
1
,
1
))
mask
=
mask
[
None
].
transpose
(
0
,
1
,
2
,
3
)
# what does this step do?
...
...
@@ -143,6 +143,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt
...
...
@@ -349,7 +350,7 @@ class OnnxStableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
# preprocess mask
if
not
isinstance
(
mask_image
,
np
.
ndarray
):
mask_image
=
preprocess_mask
(
mask_image
)
mask_image
=
preprocess_mask
(
mask_image
,
self
.
vae_scale_factor
)
mask_image
=
mask_image
.
astype
(
latents_dtype
)
mask
=
np
.
concatenate
([
mask_image
]
*
num_images_per_prompt
,
axis
=
0
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
View file @
05a36d5c
...
...
@@ -140,6 +140,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -378,7 +379,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
)
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -419,9 +420,9 @@ class StableDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -468,8 +469,8 @@ class StableDiffusionPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
View file @
05a36d5c
...
...
@@ -108,6 +108,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention
...
...
@@ -281,7 +282,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -324,9 +325,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
configuration of
[this](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json)
`CLIPFeatureExtractor`
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -370,8 +371,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
View file @
05a36d5c
...
...
@@ -153,6 +153,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
View file @
05a36d5c
...
...
@@ -218,6 +218,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
@@ -468,7 +469,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -490,7 +491,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
# resize the mask to latents shape as we concatenate the mask to the latents
# we do that before converting to dtype to avoid breaking in case we're using cpu_offload
# and half precision
mask
=
torch
.
nn
.
functional
.
interpolate
(
mask
,
size
=
(
height
//
8
,
width
//
8
))
mask
=
torch
.
nn
.
functional
.
interpolate
(
mask
,
size
=
(
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
)
mask
=
mask
.
to
(
device
=
device
,
dtype
=
dtype
)
masked_image
=
masked_image
.
to
(
device
=
device
,
dtype
=
dtype
)
...
...
@@ -547,9 +550,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -596,8 +599,8 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py
View file @
05a36d5c
...
...
@@ -51,11 +51,11 @@ def preprocess_image(image):
return
2.0
*
image
-
1.0
def
preprocess_mask
(
mask
):
def
preprocess_mask
(
mask
,
scale_factor
=
8
):
mask
=
mask
.
convert
(
"L"
)
w
,
h
=
mask
.
size
w
,
h
=
map
(
lambda
x
:
x
-
x
%
32
,
(
w
,
h
))
# resize to integer multiple of 32
mask
=
mask
.
resize
((
w
//
8
,
h
//
8
),
resample
=
PIL_INTERPOLATION
[
"nearest"
])
mask
=
mask
.
resize
((
w
//
scale_factor
,
h
//
scale_factor
),
resample
=
PIL_INTERPOLATION
[
"nearest"
])
mask
=
np
.
array
(
mask
).
astype
(
np
.
float32
)
/
255.0
mask
=
np
.
tile
(
mask
,
(
4
,
1
,
1
))
mask
=
mask
[
None
].
transpose
(
0
,
1
,
2
,
3
)
# what does this step do?
...
...
@@ -166,6 +166,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
safety_checker
=
safety_checker
,
feature_extractor
=
feature_extractor
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing
...
...
@@ -541,7 +542,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
init_image
=
preprocess_image
(
init_image
)
if
not
isinstance
(
mask_image
,
torch
.
FloatTensor
):
mask_image
=
preprocess_mask
(
mask_image
)
mask_image
=
preprocess_mask
(
mask_image
,
self
.
vae_scale_factor
)
# 5. set timesteps
self
.
scheduler
.
set_timesteps
(
num_inference_steps
,
device
=
device
)
...
...
src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py
View file @
05a36d5c
...
...
@@ -136,6 +136,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
feature_extractor
=
feature_extractor
,
)
self
.
_safety_text_concept
=
safety_concept
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
self
.
register_to_config
(
requires_safety_checker
=
requires_safety_checker
)
@
property
...
...
@@ -443,7 +444,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -531,9 +532,9 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -600,8 +601,8 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion.py
View file @
05a36d5c
...
...
@@ -78,6 +78,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
def
enable_attention_slicing
(
self
,
slice_size
:
Optional
[
Union
[
str
,
int
]]
=
"auto"
):
r
"""
...
...
@@ -131,9 +132,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
image (`PIL.Image.Image`, `List[PIL.Image.Image]` or `torch.Tensor`):
The image prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -247,9 +248,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -360,9 +361,9 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py
View file @
05a36d5c
...
...
@@ -87,6 +87,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
if
self
.
text_unet
is
not
None
and
(
"dual_cross_attention"
not
in
self
.
image_unet
.
config
or
not
self
.
image_unet
.
config
.
dual_cross_attention
...
...
@@ -419,7 +420,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -474,9 +475,9 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -552,8 +553,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
returning a tuple, the first element is a list with the generated images.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py
View file @
05a36d5c
...
...
@@ -71,6 +71,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention with unet->image_unet
def
enable_xformers_memory_efficient_attention
(
self
):
...
...
@@ -277,7 +278,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -318,9 +319,9 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
Args:
image (`PIL.Image.Image`, `List[PIL.Image.Image]` or `torch.Tensor`):
The image prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -392,8 +393,8 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
image
,
height
,
width
,
callback_steps
)
...
...
src/diffusers/pipelines/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py
View file @
05a36d5c
...
...
@@ -75,6 +75,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
vae
=
vae
,
scheduler
=
scheduler
,
)
self
.
vae_scale_factor
=
2
**
(
len
(
self
.
vae
.
config
.
block_out_channels
)
-
1
)
if
self
.
text_unet
is
not
None
:
self
.
_swap_unet_attention_blocks
()
...
...
@@ -337,7 +338,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
def
prepare_latents
(
self
,
batch_size
,
num_channels_latents
,
height
,
width
,
dtype
,
device
,
generator
,
latents
=
None
):
shape
=
(
batch_size
,
num_channels_latents
,
height
//
8
,
width
//
8
)
shape
=
(
batch_size
,
num_channels_latents
,
height
//
self
.
vae_scale_factor
,
width
//
self
.
vae_scale_factor
)
if
latents
is
None
:
if
device
.
type
==
"mps"
:
# randn does not work reproducibly on mps
...
...
@@ -378,9 +379,9 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
Args:
prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation.
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
height (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The height in pixels of the generated image.
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
8
):
width (`int`, *optional*, defaults to self.image_unet.config.sample_size *
self.vae_scale_factor
):
The width in pixels of the generated image.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...
...
@@ -444,8 +445,8 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
(nsfw) content, according to the `safety_checker`.
"""
# 0. Default height and width to unet
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
8
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
8
height
=
height
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
width
=
width
or
self
.
image_unet
.
config
.
sample_size
*
self
.
vae_scale_factor
# 1. Check inputs. Raise error if not correct
self
.
check_inputs
(
prompt
,
height
,
width
,
callback_steps
)
...
...
tests/pipelines/altdiffusion/test_alt_diffusion.py
View file @
05a36d5c
...
...
@@ -172,7 +172,9 @@ class AltDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.5748162
,
0.60447145
,
0.48821217
,
0.50100636
,
0.5431185
,
0.45763683
,
0.49657696
,
0.48132733
,
0.47573093
])
expected_slice
=
np
.
array
(
[
0.5748162
,
0.60447145
,
0.48821217
,
0.50100636
,
0.5431185
,
0.45763683
,
0.49657696
,
0.48132733
,
0.47573093
]
)
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
@@ -219,7 +221,9 @@ class AltDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.51605093
,
0.5707241
,
0.47365507
,
0.50578886
,
0.5633877
,
0.4642503
,
0.5182081
,
0.48763484
,
0.49084237
])
expected_slice
=
np
.
array
(
[
0.51605093
,
0.5707241
,
0.47365507
,
0.50578886
,
0.5633877
,
0.4642503
,
0.5182081
,
0.48763484
,
0.49084237
]
)
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
tests/pipelines/latent_diffusion/test_latent_diffusion.py
View file @
05a36d5c
...
...
@@ -111,8 +111,8 @@ class LDMTextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.
507
4
,
0.5
026
,
0.4
998
,
0.4
0
56
,
0.
3523
,
0.4649
,
0.52
89
,
0.5
299
,
0.4897
])
assert
image
.
shape
==
(
1
,
16
,
16
,
3
)
expected_slice
=
np
.
array
([
0.
6806
,
0.545
4
,
0.5
638
,
0.4
893
,
0.4
6
56
,
0.
4257
,
0.6248
,
0.52
17
,
0.5
498
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment