Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
107e78b1
Commit
107e78b1
authored
Nov 16, 2023
by
comfyanonymous
Browse files
Add support for loading SSD1B diffusers unet version.
Improve diffusers model detection.
parent
7e3fe3ad
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
44 additions
and
32 deletions
+44
-32
comfy/model_detection.py
comfy/model_detection.py
+44
-32
No files found.
comfy/model_detection.py
View file @
107e78b1
...
@@ -186,17 +186,24 @@ def convert_config(unet_config):
...
@@ -186,17 +186,24 @@ def convert_config(unet_config):
def
unet_config_from_diffusers_unet
(
state_dict
,
dtype
):
def
unet_config_from_diffusers_unet
(
state_dict
,
dtype
):
match
=
{}
match
=
{}
attention_resolutions
=
[]
transformer_depth
=
[]
attn_res
=
1
attn_res
=
1
for
i
in
range
(
5
):
down_blocks
=
count_blocks
(
state_dict
,
"down_blocks.{}"
)
k
=
"down_blocks.{}.attentions.1.transformer_blocks.0.attn2.to_k.weight"
.
format
(
i
)
for
i
in
range
(
down_blocks
):
if
k
in
state_dict
:
attn_blocks
=
count_blocks
(
state_dict
,
"down_blocks.{}.attentions."
.
format
(
i
)
+
'{}'
)
match
[
"context_dim"
]
=
state_dict
[
k
].
shape
[
1
]
for
ab
in
range
(
attn_blocks
):
attention_resolutions
.
append
(
attn_res
)
transformer_count
=
count_blocks
(
state_dict
,
"down_blocks.{}.attentions.{}.transformer_blocks."
.
format
(
i
,
ab
)
+
'{}'
)
transformer_depth
.
append
(
transformer_count
)
if
transformer_count
>
0
:
match
[
"context_dim"
]
=
state_dict
[
"down_blocks.{}.attentions.{}.transformer_blocks.0.attn2.to_k.weight"
.
format
(
i
,
ab
)].
shape
[
1
]
attn_res
*=
2
attn_res
*=
2
if
attn_blocks
==
0
:
transformer_depth
.
append
(
0
)
transformer_depth
.
append
(
0
)
match
[
"
attention_resolutions"
]
=
attention_resolutions
match
[
"
transformer_depth"
]
=
transformer_depth
match
[
"model_channels"
]
=
state_dict
[
"conv_in.weight"
].
shape
[
0
]
match
[
"model_channels"
]
=
state_dict
[
"conv_in.weight"
].
shape
[
0
]
match
[
"in_channels"
]
=
state_dict
[
"conv_in.weight"
].
shape
[
1
]
match
[
"in_channels"
]
=
state_dict
[
"conv_in.weight"
].
shape
[
1
]
...
@@ -208,50 +215,55 @@ def unet_config_from_diffusers_unet(state_dict, dtype):
...
@@ -208,50 +215,55 @@ def unet_config_from_diffusers_unet(state_dict, dtype):
SDXL
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SDXL
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
2
,
4
],
'transformer_depth'
:
[
0
,
2
,
10
],
'channel_mult'
:
[
1
,
2
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
2
,
2
,
10
,
10
],
'channel_mult'
:
[
1
,
2
,
4
],
'transformer_depth_middle'
:
10
,
'transformer_depth_middle'
:
10
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
"
num_head_channels
"
:
64
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
'
num_head_channels
'
:
64
,
'transformer_depth_output'
:
[
0
,
0
,
0
,
2
,
2
,
2
,
10
,
10
,
10
]
}
SDXL_refiner
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SDXL_refiner
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2560
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
384
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2560
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
384
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
2
,
4
],
'transformer_depth'
:
[
0
,
4
,
4
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
4
,
4
,
4
,
4
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'transformer_depth_middle'
:
4
,
'transformer_depth_middle'
:
4
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1280
,
"
num_head_channels
"
:
64
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1280
,
'
num_head_channels
'
:
64
,
'transformer_depth_output'
:
[
0
,
0
,
0
,
4
,
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
]
}
SD21
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SD21
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'adm_in_channels'
:
None
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'adm_in_channels'
:
None
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
[
2
,
2
,
2
,
2
],
'attention_resolutions'
:
[
1
,
2
,
4
],
'transformer_depth'
:
[
1
,
1
,
1
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'transformer_depth'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'transformer_depth_middle'
:
1
,
'use_linear_in_transformer'
:
True
,
'transformer_depth_middle'
:
1
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1024
,
"
num_head_channels
"
:
64
}
'context_dim'
:
1024
,
'
num_head_channels
'
:
64
,
'transformer_depth_output'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
]
}
SD21_uncliph
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SD21_uncliph
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2048
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2048
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
1
,
2
,
4
],
'transformer_depth'
:
[
1
,
1
,
1
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
,
2
],
'transformer_depth'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'transformer_depth_middle'
:
1
,
'transformer_depth_middle'
:
1
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1024
,
"
num_head_channels
"
:
64
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1024
,
'
num_head_channels
'
:
64
,
'transformer_depth_output'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
]
}
SD21_unclipl
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SD21_unclipl
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
1536
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
1536
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
1
,
2
,
4
],
'transformer_depth'
:
[
1
,
1
,
1
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
,
2
],
'transformer_depth'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
],
'transformer_depth_middle'
:
1
,
'transformer_depth_middle'
:
1
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1024
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
1024
,
'num_head_channels'
:
64
,
'transformer_depth_output'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
]
}
SD15
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SD15
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'adm_in_channels'
:
None
,
'adm_in_channels'
:
None
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
[
2
,
2
,
2
,
2
],
'transformer_depth'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
],
'
attention_resolutions
'
:
[
1
,
2
,
4
],
'transformer_depth'
:
[
1
,
1
,
1
,
0
],
'channel_mult'
:
[
1
,
2
,
4
,
4
]
,
'
channel_mult
'
:
[
1
,
2
,
4
,
4
],
'transformer_depth
_middle
'
:
1
,
'use_linear_in_transformer'
:
False
,
'context_dim'
:
768
,
'num_heads'
:
8
,
'transformer_depth_
middle
'
:
1
,
'use_linear_in_transformer'
:
False
,
'context_dim'
:
768
,
"num_heads"
:
8
}
'transformer_depth_
output
'
:
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
]
}
SDXL_mid_cnet
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SDXL_mid_cnet
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
4
],
'transformer_depth'
:
[
0
,
0
,
1
],
'channel_mult'
:
[
1
,
2
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
0
,
0
,
1
,
1
],
'channel_mult'
:
[
1
,
2
,
4
],
'transformer_depth_middle'
:
1
,
'transformer_depth_middle'
:
1
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
"
num_head_channels
"
:
64
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
'
num_head_channels
'
:
64
,
'transformer_depth_output'
:
[
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
]
}
SDXL_small_cnet
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SDXL_small_cnet
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
],
'transformer_depth'
:
[
0
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
0
,
0
,
0
,
0
],
'channel_mult'
:
[
1
,
2
,
4
],
'transformer_depth_middle'
:
0
,
'transformer_depth_middle'
:
0
,
'use_linear_in_transformer'
:
True
,
"
num_head_channels
"
:
64
,
'context_dim'
:
1
}
'use_linear_in_transformer'
:
True
,
'
num_head_channels
'
:
64
,
'context_dim'
:
1
,
'transformer_depth_output'
:
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
}
SDXL_diffusers_inpaint
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
SDXL_diffusers_inpaint
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
9
,
'model_channels'
:
320
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
9
,
'model_channels'
:
320
,
'num_res_blocks'
:
2
,
'attention_resolutions'
:
[
2
,
4
],
'transformer_depth'
:
[
0
,
2
,
10
],
'channel_mult'
:
[
1
,
2
,
4
],
'num_res_blocks'
:
[
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
2
,
2
,
10
,
10
],
'channel_mult'
:
[
1
,
2
,
4
],
'transformer_depth_middle'
:
10
,
'transformer_depth_middle'
:
10
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
"num_head_channels"
:
64
}
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
'num_head_channels'
:
64
,
'transformer_depth_output'
:
[
0
,
0
,
0
,
2
,
2
,
2
,
10
,
10
,
10
]}
SSD_1B
=
{
'use_checkpoint'
:
False
,
'image_size'
:
32
,
'out_channels'
:
4
,
'use_spatial_transformer'
:
True
,
'legacy'
:
False
,
'num_classes'
:
'sequential'
,
'adm_in_channels'
:
2816
,
'dtype'
:
dtype
,
'in_channels'
:
4
,
'model_channels'
:
320
,
'num_res_blocks'
:
[
2
,
2
,
2
],
'transformer_depth'
:
[
0
,
0
,
2
,
2
,
4
,
4
],
'transformer_depth_output'
:
[
0
,
0
,
0
,
1
,
1
,
2
,
10
,
4
,
4
],
'channel_mult'
:
[
1
,
2
,
4
],
'transformer_depth_middle'
:
-
1
,
'use_linear_in_transformer'
:
True
,
'context_dim'
:
2048
,
'num_head_channels'
:
64
}
supported_models
=
[
SDXL
,
SDXL_refiner
,
SD21
,
SD15
,
SD21_uncliph
,
SD21_unclipl
,
SDXL_mid_cnet
,
SDXL_small_cnet
,
SDXL_diffusers_inpaint
]
supported_models
=
[
SDXL
,
SDXL_refiner
,
SD21
,
SD15
,
SD21_uncliph
,
SD21_unclipl
,
SDXL_mid_cnet
,
SDXL_small_cnet
,
SDXL_diffusers_inpaint
,
SSD_1B
]
for
unet_config
in
supported_models
:
for
unet_config
in
supported_models
:
matches
=
True
matches
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment