Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
8ceb5a02
Commit
8ceb5a02
authored
Jun 27, 2024
by
comfyanonymous
Browse files
Support saving stable audio checkpoint that can be loaded back.
parent
5ff3d4eb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
2 deletions
+14
-2
comfy/model_base.py
comfy/model_base.py
+9
-0
comfy/sd.py
comfy/sd.py
+1
-1
comfy/supported_models.py
comfy/supported_models.py
+4
-1
No files found.
comfy/model_base.py
View file @
8ceb5a02
...
@@ -627,3 +627,12 @@ class StableAudio1(BaseModel):
...
@@ -627,3 +627,12 @@ class StableAudio1(BaseModel):
cross_attn
=
torch
.
cat
([
cross_attn
.
to
(
device
),
seconds_start_embed
.
repeat
((
cross_attn
.
shape
[
0
],
1
,
1
)),
seconds_total_embed
.
repeat
((
cross_attn
.
shape
[
0
],
1
,
1
))],
dim
=
1
)
cross_attn
=
torch
.
cat
([
cross_attn
.
to
(
device
),
seconds_start_embed
.
repeat
((
cross_attn
.
shape
[
0
],
1
,
1
)),
seconds_total_embed
.
repeat
((
cross_attn
.
shape
[
0
],
1
,
1
))],
dim
=
1
)
out
[
'c_crossattn'
]
=
comfy
.
conds
.
CONDRegular
(
cross_attn
)
out
[
'c_crossattn'
]
=
comfy
.
conds
.
CONDRegular
(
cross_attn
)
return
out
return
out
def
state_dict_for_saving
(
self
,
clip_state_dict
=
None
,
vae_state_dict
=
None
,
clip_vision_state_dict
=
None
):
sd
=
super
().
state_dict_for_saving
(
clip_state_dict
=
clip_state_dict
,
vae_state_dict
=
vae_state_dict
,
clip_vision_state_dict
=
clip_vision_state_dict
)
d
=
{
"conditioner.conditioners.seconds_start."
:
self
.
seconds_start_embedder
.
state_dict
(),
"conditioner.conditioners.seconds_total."
:
self
.
seconds_total_embedder
.
state_dict
()}
for
k
in
d
:
s
=
d
[
k
]
for
l
in
s
:
sd
[
"{}{}"
.
format
(
k
,
l
)]
=
s
[
l
]
return
sd
comfy/sd.py
View file @
8ceb5a02
...
@@ -236,7 +236,7 @@ class VAE:
...
@@ -236,7 +236,7 @@ class VAE:
self
.
first_stage_model
=
AutoencodingEngine
(
regularizer_config
=
{
'target'
:
"comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"
},
self
.
first_stage_model
=
AutoencodingEngine
(
regularizer_config
=
{
'target'
:
"comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"
},
encoder_config
=
{
'target'
:
"comfy.ldm.modules.diffusionmodules.model.Encoder"
,
'params'
:
ddconfig
},
encoder_config
=
{
'target'
:
"comfy.ldm.modules.diffusionmodules.model.Encoder"
,
'params'
:
ddconfig
},
decoder_config
=
{
'target'
:
"comfy.ldm.modules.diffusionmodules.model.Decoder"
,
'params'
:
ddconfig
})
decoder_config
=
{
'target'
:
"comfy.ldm.modules.diffusionmodules.model.Decoder"
,
'params'
:
ddconfig
})
elif
"decoder.layers.
0.weight_v
"
in
sd
:
elif
"decoder.layers.
1.layers.0.beta
"
in
sd
:
self
.
first_stage_model
=
AudioOobleckVAE
()
self
.
first_stage_model
=
AudioOobleckVAE
()
self
.
memory_used_encode
=
lambda
shape
,
dtype
:
(
1000
*
shape
[
2
])
*
model_management
.
dtype_size
(
dtype
)
self
.
memory_used_encode
=
lambda
shape
,
dtype
:
(
1000
*
shape
[
2
])
*
model_management
.
dtype_size
(
dtype
)
self
.
memory_used_decode
=
lambda
shape
,
dtype
:
(
1000
*
shape
[
2
]
*
2048
)
*
model_management
.
dtype_size
(
dtype
)
self
.
memory_used_decode
=
lambda
shape
,
dtype
:
(
1000
*
shape
[
2
]
*
2048
)
*
model_management
.
dtype_size
(
dtype
)
...
...
comfy/supported_models.py
View file @
8ceb5a02
...
@@ -543,13 +543,16 @@ class StableAudio(supported_models_base.BASE):
...
@@ -543,13 +543,16 @@ class StableAudio(supported_models_base.BASE):
seconds_total_sd
=
utils
.
state_dict_prefix_replace
(
state_dict
,
{
"conditioner.conditioners.seconds_total."
:
""
},
filter_keys
=
True
)
seconds_total_sd
=
utils
.
state_dict_prefix_replace
(
state_dict
,
{
"conditioner.conditioners.seconds_total."
:
""
},
filter_keys
=
True
)
return
model_base
.
StableAudio1
(
self
,
seconds_start_embedder_weights
=
seconds_start_sd
,
seconds_total_embedder_weights
=
seconds_total_sd
,
device
=
device
)
return
model_base
.
StableAudio1
(
self
,
seconds_start_embedder_weights
=
seconds_start_sd
,
seconds_total_embedder_weights
=
seconds_total_sd
,
device
=
device
)
def
process_unet_state_dict
(
self
,
state_dict
):
def
process_unet_state_dict
(
self
,
state_dict
):
for
k
in
list
(
state_dict
.
keys
()):
for
k
in
list
(
state_dict
.
keys
()):
if
k
.
endswith
(
".cross_attend_norm.beta"
)
or
k
.
endswith
(
".ff_norm.beta"
)
or
k
.
endswith
(
".pre_norm.beta"
):
#These weights are all zero
if
k
.
endswith
(
".cross_attend_norm.beta"
)
or
k
.
endswith
(
".ff_norm.beta"
)
or
k
.
endswith
(
".pre_norm.beta"
):
#These weights are all zero
state_dict
.
pop
(
k
)
state_dict
.
pop
(
k
)
return
state_dict
return
state_dict
def
process_unet_state_dict_for_saving
(
self
,
state_dict
):
replace_prefix
=
{
""
:
"model.model."
}
return
utils
.
state_dict_prefix_replace
(
state_dict
,
replace_prefix
)
def
clip_target
(
self
,
state_dict
=
{}):
def
clip_target
(
self
,
state_dict
=
{}):
return
supported_models_base
.
ClipTarget
(
sa_t5
.
SAT5Tokenizer
,
sa_t5
.
SAT5Model
)
return
supported_models_base
.
ClipTarget
(
sa_t5
.
SAT5Tokenizer
,
sa_t5
.
SAT5Model
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment