Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
a326d611
Unverified
Commit
a326d611
authored
Feb 13, 2024
by
Dhruv Nair
Committed by
GitHub
Feb 12, 2024
Browse files
Fix configuring VAE from single file mixin (#6950)
* update
parent
e7696e20
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
3 deletions
+28
-3
src/diffusers/loaders/autoencoder.py
src/diffusers/loaders/autoencoder.py
+22
-1
src/diffusers/loaders/single_file_utils.py
src/diffusers/loaders/single_file_utils.py
+6
-2
No files found.
src/diffusers/loaders/autoencoder.py
View file @
a326d611
...
@@ -38,6 +38,9 @@ class FromOriginalVAEMixin:
...
@@ -38,6 +38,9 @@ class FromOriginalVAEMixin:
- A link to the `.ckpt` file (for example
- A link to the `.ckpt` file (for example
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
`"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
- A path to a *file* containing all pipeline weights.
- A path to a *file* containing all pipeline weights.
config_file (`str`, *optional*):
Filepath to the configuration YAML file associated with the model. If not provided it will default to:
https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml
torch_dtype (`str` or `torch.dtype`, *optional*):
torch_dtype (`str` or `torch.dtype`, *optional*):
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
dtype is automatically derived from the model's weights.
dtype is automatically derived from the model's weights.
...
@@ -65,6 +68,13 @@ class FromOriginalVAEMixin:
...
@@ -65,6 +68,13 @@ class FromOriginalVAEMixin:
image_size (`int`, *optional*, defaults to 512):
image_size (`int`, *optional*, defaults to 512):
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
Diffusion v2 base model. Use 768 for Stable Diffusion v2.
scaling_factor (`float`, *optional*, defaults to 0.18215):
The component-wise standard deviation of the trained latent space computed using the first batch of the
training set. This is used to scale the latent space to have unit variance when training the diffusion
model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z
= 1 / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution
Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
use_safetensors (`bool`, *optional*, defaults to `None`):
use_safetensors (`bool`, *optional*, defaults to `None`):
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
If set to `None`, the safetensors weights are downloaded if they're available **and** if the
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
...
@@ -92,6 +102,7 @@ class FromOriginalVAEMixin:
...
@@ -92,6 +102,7 @@ class FromOriginalVAEMixin:
"""
"""
original_config_file
=
kwargs
.
pop
(
"original_config_file"
,
None
)
original_config_file
=
kwargs
.
pop
(
"original_config_file"
,
None
)
config_file
=
kwargs
.
pop
(
"config_file"
,
None
)
resume_download
=
kwargs
.
pop
(
"resume_download"
,
False
)
resume_download
=
kwargs
.
pop
(
"resume_download"
,
False
)
force_download
=
kwargs
.
pop
(
"force_download"
,
False
)
force_download
=
kwargs
.
pop
(
"force_download"
,
False
)
proxies
=
kwargs
.
pop
(
"proxies"
,
None
)
proxies
=
kwargs
.
pop
(
"proxies"
,
None
)
...
@@ -103,6 +114,13 @@ class FromOriginalVAEMixin:
...
@@ -103,6 +114,13 @@ class FromOriginalVAEMixin:
use_safetensors
=
kwargs
.
pop
(
"use_safetensors"
,
True
)
use_safetensors
=
kwargs
.
pop
(
"use_safetensors"
,
True
)
class_name
=
cls
.
__name__
class_name
=
cls
.
__name__
if
(
config_file
is
not
None
)
and
(
original_config_file
is
not
None
):
raise
ValueError
(
"You cannot pass both `config_file` and `original_config_file` to `from_single_file`. Please use only one of these arguments."
)
original_config_file
=
original_config_file
or
config_file
original_config
,
checkpoint
=
fetch_ldm_config_and_checkpoint
(
original_config
,
checkpoint
=
fetch_ldm_config_and_checkpoint
(
pretrained_model_link_or_path
=
pretrained_model_link_or_path
,
pretrained_model_link_or_path
=
pretrained_model_link_or_path
,
class_name
=
class_name
,
class_name
=
class_name
,
...
@@ -118,7 +136,10 @@ class FromOriginalVAEMixin:
...
@@ -118,7 +136,10 @@ class FromOriginalVAEMixin:
)
)
image_size
=
kwargs
.
pop
(
"image_size"
,
None
)
image_size
=
kwargs
.
pop
(
"image_size"
,
None
)
component
=
create_diffusers_vae_model_from_ldm
(
class_name
,
original_config
,
checkpoint
,
image_size
=
image_size
)
scaling_factor
=
kwargs
.
pop
(
"scaling_factor"
,
None
)
component
=
create_diffusers_vae_model_from_ldm
(
class_name
,
original_config
,
checkpoint
,
image_size
=
image_size
,
scaling_factor
=
scaling_factor
)
vae
=
component
[
"vae"
]
vae
=
component
[
"vae"
]
if
torch_dtype
is
not
None
:
if
torch_dtype
is
not
None
:
vae
=
vae
.
to
(
torch_dtype
)
vae
=
vae
.
to
(
torch_dtype
)
...
...
src/diffusers/loaders/single_file_utils.py
View file @
a326d611
...
@@ -175,6 +175,7 @@ DIFFUSERS_TO_LDM_MAPPING = {
...
@@ -175,6 +175,7 @@ DIFFUSERS_TO_LDM_MAPPING = {
}
}
LDM_VAE_KEY
=
"first_stage_model."
LDM_VAE_KEY
=
"first_stage_model."
LDM_VAE_DEFAULT_SCALING_FACTOR
=
0.18215
LDM_UNET_KEY
=
"model.diffusion_model."
LDM_UNET_KEY
=
"model.diffusion_model."
LDM_CONTROLNET_KEY
=
"control_model."
LDM_CONTROLNET_KEY
=
"control_model."
LDM_CLIP_PREFIX_TO_REMOVE
=
[
"cond_stage_model.transformer."
,
"conditioner.embedders.0.transformer."
]
LDM_CLIP_PREFIX_TO_REMOVE
=
[
"cond_stage_model.transformer."
,
"conditioner.embedders.0.transformer."
]
...
@@ -518,7 +519,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None
...
@@ -518,7 +519,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None
Creates a config for the diffusers based on the config of the LDM model.
Creates a config for the diffusers based on the config of the LDM model.
"""
"""
vae_params
=
original_config
[
"model"
][
"params"
][
"first_stage_config"
][
"params"
][
"ddconfig"
]
vae_params
=
original_config
[
"model"
][
"params"
][
"first_stage_config"
][
"params"
][
"ddconfig"
]
scaling_factor
=
scaling_factor
or
original_config
[
"model"
][
"params"
][
"scale_factor"
]
if
scaling_factor
is
None
and
"scale_factor"
in
original_config
[
"model"
][
"params"
]:
scaling_factor
=
original_config
[
"model"
][
"params"
][
"scale_factor"
]
elif
scaling_factor
is
None
:
scaling_factor
=
LDM_VAE_DEFAULT_SCALING_FACTOR
block_out_channels
=
[
vae_params
[
"ch"
]
*
mult
for
mult
in
vae_params
[
"ch_mult"
]]
block_out_channels
=
[
vae_params
[
"ch"
]
*
mult
for
mult
in
vae_params
[
"ch_mult"
]]
down_block_types
=
[
"DownEncoderBlock2D"
]
*
len
(
block_out_channels
)
down_block_types
=
[
"DownEncoderBlock2D"
]
*
len
(
block_out_channels
)
...
@@ -1173,7 +1177,7 @@ def create_diffusers_unet_model_from_ldm(
...
@@ -1173,7 +1177,7 @@ def create_diffusers_unet_model_from_ldm(
def
create_diffusers_vae_model_from_ldm
(
def
create_diffusers_vae_model_from_ldm
(
pipeline_class_name
,
original_config
,
checkpoint
,
image_size
=
None
,
scaling_factor
=
0.18125
pipeline_class_name
,
original_config
,
checkpoint
,
image_size
=
None
,
scaling_factor
=
None
):
):
# import here to avoid circular imports
# import here to avoid circular imports
from
..models
import
AutoencoderKL
from
..models
import
AutoencoderKL
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment