Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
AnimateDiff_pytorch
Commits
214c357b
Commit
214c357b
authored
Jan 02, 2024
by
mashun1
Browse files
animatediff
parents
Pipeline
#690
canceled with stages
Changes
147
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2382 additions
and
0 deletions
+2382
-0
animatediff/utils/convert_from_ckpt.py
animatediff/utils/convert_from_ckpt.py
+959
-0
animatediff/utils/convert_lora_safetensor_to_diffusers.py
animatediff/utils/convert_lora_safetensor_to_diffusers.py
+152
-0
animatediff/utils/util.py
animatediff/utils/util.py
+172
-0
app.py
app.py
+328
-0
configs/inference/inference-v1.yaml
configs/inference/inference-v1.yaml
+23
-0
configs/inference/inference-v2.yaml
configs/inference/inference-v2.yaml
+24
-0
configs/inference/inference-v3.yaml
configs/inference/inference-v3.yaml
+22
-0
configs/inference/sparsectrl/image_condition.yaml
configs/inference/sparsectrl/image_condition.yaml
+17
-0
configs/inference/sparsectrl/latent_condition.yaml
configs/inference/sparsectrl/latent_condition.yaml
+17
-0
configs/prompts/v1/v1-1-ToonYou.yaml
configs/prompts/v1/v1-1-ToonYou.yaml
+40
-0
configs/prompts/v1/v1-2-Lyriel.yaml
configs/prompts/v1/v1-2-Lyriel.yaml
+46
-0
configs/prompts/v1/v1-3-RcnzCartoon.yaml
configs/prompts/v1/v1-3-RcnzCartoon.yaml
+46
-0
configs/prompts/v1/v1-4-MajicMix.yaml
configs/prompts/v1/v1-4-MajicMix.yaml
+46
-0
configs/prompts/v1/v1-5-RealisticVision.yaml
configs/prompts/v1/v1-5-RealisticVision.yaml
+46
-0
configs/prompts/v1/v1-6-Tusun.yaml
configs/prompts/v1/v1-6-Tusun.yaml
+42
-0
configs/prompts/v1/v1-7-FilmVelvia.yaml
configs/prompts/v1/v1-7-FilmVelvia.yaml
+48
-0
configs/prompts/v1/v1-8-GhibliBackground.yaml
configs/prompts/v1/v1-8-GhibliBackground.yaml
+42
-0
configs/prompts/v2/v2-1-RealisticVision.yaml
configs/prompts/v2/v2-1-RealisticVision.yaml
+21
-0
configs/prompts/v2/v2-2-RealisticVision-MotionLoRA.yaml
configs/prompts/v2/v2-2-RealisticVision-MotionLoRA.yaml
+174
-0
configs/prompts/v3/v3-1-T2V.yaml
configs/prompts/v3/v3-1-T2V.yaml
+117
-0
No files found.
animatediff/utils/convert_from_ckpt.py
0 → 100644
View file @
214c357b
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Conversion script for the Stable Diffusion checkpoints."""
import
re
from
io
import
BytesIO
from
typing
import
Optional
import
requests
import
torch
from
transformers
import
(
AutoFeatureExtractor
,
BertTokenizerFast
,
CLIPImageProcessor
,
CLIPTextModel
,
CLIPTextModelWithProjection
,
CLIPTokenizer
,
CLIPVisionConfig
,
CLIPVisionModelWithProjection
,
)
from
diffusers.models
import
(
AutoencoderKL
,
PriorTransformer
,
UNet2DConditionModel
,
)
from
diffusers.schedulers
import
(
DDIMScheduler
,
DDPMScheduler
,
DPMSolverMultistepScheduler
,
EulerAncestralDiscreteScheduler
,
EulerDiscreteScheduler
,
HeunDiscreteScheduler
,
LMSDiscreteScheduler
,
PNDMScheduler
,
UnCLIPScheduler
,
)
from
diffusers.utils.import_utils
import
BACKENDS_MAPPING
def
shave_segments
(
path
,
n_shave_prefix_segments
=
1
):
"""
Removes segments. Positive values shave the first segments, negative shave the last segments.
"""
if
n_shave_prefix_segments
>=
0
:
return
"."
.
join
(
path
.
split
(
"."
)[
n_shave_prefix_segments
:])
else
:
return
"."
.
join
(
path
.
split
(
"."
)[:
n_shave_prefix_segments
])
def
renew_resnet_paths
(
old_list
,
n_shave_prefix_segments
=
0
):
"""
Updates paths inside resnets to the new naming scheme (local renaming)
"""
mapping
=
[]
for
old_item
in
old_list
:
new_item
=
old_item
.
replace
(
"in_layers.0"
,
"norm1"
)
new_item
=
new_item
.
replace
(
"in_layers.2"
,
"conv1"
)
new_item
=
new_item
.
replace
(
"out_layers.0"
,
"norm2"
)
new_item
=
new_item
.
replace
(
"out_layers.3"
,
"conv2"
)
new_item
=
new_item
.
replace
(
"emb_layers.1"
,
"time_emb_proj"
)
new_item
=
new_item
.
replace
(
"skip_connection"
,
"conv_shortcut"
)
new_item
=
shave_segments
(
new_item
,
n_shave_prefix_segments
=
n_shave_prefix_segments
)
mapping
.
append
({
"old"
:
old_item
,
"new"
:
new_item
})
return
mapping
def
renew_vae_resnet_paths
(
old_list
,
n_shave_prefix_segments
=
0
):
"""
Updates paths inside resnets to the new naming scheme (local renaming)
"""
mapping
=
[]
for
old_item
in
old_list
:
new_item
=
old_item
new_item
=
new_item
.
replace
(
"nin_shortcut"
,
"conv_shortcut"
)
new_item
=
shave_segments
(
new_item
,
n_shave_prefix_segments
=
n_shave_prefix_segments
)
mapping
.
append
({
"old"
:
old_item
,
"new"
:
new_item
})
return
mapping
def
renew_attention_paths
(
old_list
,
n_shave_prefix_segments
=
0
):
"""
Updates paths inside attentions to the new naming scheme (local renaming)
"""
mapping
=
[]
for
old_item
in
old_list
:
new_item
=
old_item
# new_item = new_item.replace('norm.weight', 'group_norm.weight')
# new_item = new_item.replace('norm.bias', 'group_norm.bias')
# new_item = new_item.replace('proj_out.weight', 'proj_attn.weight')
# new_item = new_item.replace('proj_out.bias', 'proj_attn.bias')
# new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments)
mapping
.
append
({
"old"
:
old_item
,
"new"
:
new_item
})
return
mapping
def
renew_vae_attention_paths
(
old_list
,
n_shave_prefix_segments
=
0
):
"""
Updates paths inside attentions to the new naming scheme (local renaming)
"""
mapping
=
[]
for
old_item
in
old_list
:
new_item
=
old_item
new_item
=
new_item
.
replace
(
"norm.weight"
,
"group_norm.weight"
)
new_item
=
new_item
.
replace
(
"norm.bias"
,
"group_norm.bias"
)
new_item
=
new_item
.
replace
(
"q.weight"
,
"query.weight"
)
new_item
=
new_item
.
replace
(
"q.bias"
,
"query.bias"
)
new_item
=
new_item
.
replace
(
"k.weight"
,
"key.weight"
)
new_item
=
new_item
.
replace
(
"k.bias"
,
"key.bias"
)
new_item
=
new_item
.
replace
(
"v.weight"
,
"value.weight"
)
new_item
=
new_item
.
replace
(
"v.bias"
,
"value.bias"
)
new_item
=
new_item
.
replace
(
"proj_out.weight"
,
"proj_attn.weight"
)
new_item
=
new_item
.
replace
(
"proj_out.bias"
,
"proj_attn.bias"
)
new_item
=
shave_segments
(
new_item
,
n_shave_prefix_segments
=
n_shave_prefix_segments
)
mapping
.
append
({
"old"
:
old_item
,
"new"
:
new_item
})
return
mapping
def
assign_to_checkpoint
(
paths
,
checkpoint
,
old_checkpoint
,
attention_paths_to_split
=
None
,
additional_replacements
=
None
,
config
=
None
):
"""
This does the final conversion step: take locally converted weights and apply a global renaming to them. It splits
attention layers, and takes into account additional replacements that may arise.
Assigns the weights to the new checkpoint.
"""
assert
isinstance
(
paths
,
list
),
"Paths should be a list of dicts containing 'old' and 'new' keys."
# Splits the attention layers into three variables.
if
attention_paths_to_split
is
not
None
:
for
path
,
path_map
in
attention_paths_to_split
.
items
():
old_tensor
=
old_checkpoint
[
path
]
channels
=
old_tensor
.
shape
[
0
]
//
3
target_shape
=
(
-
1
,
channels
)
if
len
(
old_tensor
.
shape
)
==
3
else
(
-
1
)
num_heads
=
old_tensor
.
shape
[
0
]
//
config
[
"num_head_channels"
]
//
3
old_tensor
=
old_tensor
.
reshape
((
num_heads
,
3
*
channels
//
num_heads
)
+
old_tensor
.
shape
[
1
:])
query
,
key
,
value
=
old_tensor
.
split
(
channels
//
num_heads
,
dim
=
1
)
checkpoint
[
path_map
[
"query"
]]
=
query
.
reshape
(
target_shape
)
checkpoint
[
path_map
[
"key"
]]
=
key
.
reshape
(
target_shape
)
checkpoint
[
path_map
[
"value"
]]
=
value
.
reshape
(
target_shape
)
for
path
in
paths
:
new_path
=
path
[
"new"
]
# These have already been assigned
if
attention_paths_to_split
is
not
None
and
new_path
in
attention_paths_to_split
:
continue
# Global renaming happens here
new_path
=
new_path
.
replace
(
"middle_block.0"
,
"mid_block.resnets.0"
)
new_path
=
new_path
.
replace
(
"middle_block.1"
,
"mid_block.attentions.0"
)
new_path
=
new_path
.
replace
(
"middle_block.2"
,
"mid_block.resnets.1"
)
if
additional_replacements
is
not
None
:
for
replacement
in
additional_replacements
:
new_path
=
new_path
.
replace
(
replacement
[
"old"
],
replacement
[
"new"
])
# proj_attn.weight has to be converted from conv 1D to linear
if
"proj_attn.weight"
in
new_path
:
checkpoint
[
new_path
]
=
old_checkpoint
[
path
[
"old"
]][:,
:,
0
]
else
:
checkpoint
[
new_path
]
=
old_checkpoint
[
path
[
"old"
]]
def
conv_attn_to_linear
(
checkpoint
):
keys
=
list
(
checkpoint
.
keys
())
attn_keys
=
[
"query.weight"
,
"key.weight"
,
"value.weight"
]
for
key
in
keys
:
if
"."
.
join
(
key
.
split
(
"."
)[
-
2
:])
in
attn_keys
:
if
checkpoint
[
key
].
ndim
>
2
:
checkpoint
[
key
]
=
checkpoint
[
key
][:,
:,
0
,
0
]
elif
"proj_attn.weight"
in
key
:
if
checkpoint
[
key
].
ndim
>
2
:
checkpoint
[
key
]
=
checkpoint
[
key
][:,
:,
0
]
def
create_unet_diffusers_config
(
original_config
,
image_size
:
int
,
controlnet
=
False
):
"""
Creates a config for the diffusers based on the config of the LDM model.
"""
if
controlnet
:
unet_params
=
original_config
.
model
.
params
.
control_stage_config
.
params
else
:
unet_params
=
original_config
.
model
.
params
.
unet_config
.
params
vae_params
=
original_config
.
model
.
params
.
first_stage_config
.
params
.
ddconfig
block_out_channels
=
[
unet_params
.
model_channels
*
mult
for
mult
in
unet_params
.
channel_mult
]
down_block_types
=
[]
resolution
=
1
for
i
in
range
(
len
(
block_out_channels
)):
block_type
=
"CrossAttnDownBlock2D"
if
resolution
in
unet_params
.
attention_resolutions
else
"DownBlock2D"
down_block_types
.
append
(
block_type
)
if
i
!=
len
(
block_out_channels
)
-
1
:
resolution
*=
2
up_block_types
=
[]
for
i
in
range
(
len
(
block_out_channels
)):
block_type
=
"CrossAttnUpBlock2D"
if
resolution
in
unet_params
.
attention_resolutions
else
"UpBlock2D"
up_block_types
.
append
(
block_type
)
resolution
//=
2
vae_scale_factor
=
2
**
(
len
(
vae_params
.
ch_mult
)
-
1
)
head_dim
=
unet_params
.
num_heads
if
"num_heads"
in
unet_params
else
None
use_linear_projection
=
(
unet_params
.
use_linear_in_transformer
if
"use_linear_in_transformer"
in
unet_params
else
False
)
if
use_linear_projection
:
# stable diffusion 2-base-512 and 2-768
if
head_dim
is
None
:
head_dim
=
[
5
,
10
,
20
,
20
]
class_embed_type
=
None
projection_class_embeddings_input_dim
=
None
if
"num_classes"
in
unet_params
:
if
unet_params
.
num_classes
==
"sequential"
:
class_embed_type
=
"projection"
assert
"adm_in_channels"
in
unet_params
projection_class_embeddings_input_dim
=
unet_params
.
adm_in_channels
else
:
raise
NotImplementedError
(
f
"Unknown conditional unet num_classes config:
{
unet_params
.
num_classes
}
"
)
config
=
{
"sample_size"
:
image_size
//
vae_scale_factor
,
"in_channels"
:
unet_params
.
in_channels
,
"down_block_types"
:
tuple
(
down_block_types
),
"block_out_channels"
:
tuple
(
block_out_channels
),
"layers_per_block"
:
unet_params
.
num_res_blocks
,
"cross_attention_dim"
:
unet_params
.
context_dim
,
"attention_head_dim"
:
head_dim
,
"use_linear_projection"
:
use_linear_projection
,
"class_embed_type"
:
class_embed_type
,
"projection_class_embeddings_input_dim"
:
projection_class_embeddings_input_dim
,
}
if
not
controlnet
:
config
[
"out_channels"
]
=
unet_params
.
out_channels
config
[
"up_block_types"
]
=
tuple
(
up_block_types
)
return
config
def
create_vae_diffusers_config
(
original_config
,
image_size
:
int
):
"""
Creates a config for the diffusers based on the config of the LDM model.
"""
vae_params
=
original_config
.
model
.
params
.
first_stage_config
.
params
.
ddconfig
_
=
original_config
.
model
.
params
.
first_stage_config
.
params
.
embed_dim
block_out_channels
=
[
vae_params
.
ch
*
mult
for
mult
in
vae_params
.
ch_mult
]
down_block_types
=
[
"DownEncoderBlock2D"
]
*
len
(
block_out_channels
)
up_block_types
=
[
"UpDecoderBlock2D"
]
*
len
(
block_out_channels
)
config
=
{
"sample_size"
:
image_size
,
"in_channels"
:
vae_params
.
in_channels
,
"out_channels"
:
vae_params
.
out_ch
,
"down_block_types"
:
tuple
(
down_block_types
),
"up_block_types"
:
tuple
(
up_block_types
),
"block_out_channels"
:
tuple
(
block_out_channels
),
"latent_channels"
:
vae_params
.
z_channels
,
"layers_per_block"
:
vae_params
.
num_res_blocks
,
}
return
config
def
create_diffusers_schedular
(
original_config
):
schedular
=
DDIMScheduler
(
num_train_timesteps
=
original_config
.
model
.
params
.
timesteps
,
beta_start
=
original_config
.
model
.
params
.
linear_start
,
beta_end
=
original_config
.
model
.
params
.
linear_end
,
beta_schedule
=
"scaled_linear"
,
)
return
schedular
def
create_ldm_bert_config
(
original_config
):
bert_params
=
original_config
.
model
.
parms
.
cond_stage_config
.
params
config
=
LDMBertConfig
(
d_model
=
bert_params
.
n_embed
,
encoder_layers
=
bert_params
.
n_layer
,
encoder_ffn_dim
=
bert_params
.
n_embed
*
4
,
)
return
config
def
convert_ldm_unet_checkpoint
(
checkpoint
,
config
,
path
=
None
,
extract_ema
=
False
,
controlnet
=
False
):
"""
Takes a state dict and a config, and returns a converted checkpoint.
"""
# extract state_dict for UNet
unet_state_dict
=
{}
keys
=
list
(
checkpoint
.
keys
())
if
controlnet
:
unet_key
=
"control_model."
else
:
unet_key
=
"model.diffusion_model."
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
if
sum
(
k
.
startswith
(
"model_ema"
)
for
k
in
keys
)
>
100
and
extract_ema
:
print
(
f
"Checkpoint
{
path
}
has both EMA and non-EMA weights."
)
print
(
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
)
for
key
in
keys
:
if
key
.
startswith
(
"model.diffusion_model"
):
flat_ema_key
=
"model_ema."
+
""
.
join
(
key
.
split
(
"."
)[
1
:])
unet_state_dict
[
key
.
replace
(
unet_key
,
""
)]
=
checkpoint
.
pop
(
flat_ema_key
)
else
:
if
sum
(
k
.
startswith
(
"model_ema"
)
for
k
in
keys
)
>
100
:
print
(
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
)
for
key
in
keys
:
if
key
.
startswith
(
unet_key
):
unet_state_dict
[
key
.
replace
(
unet_key
,
""
)]
=
checkpoint
.
pop
(
key
)
new_checkpoint
=
{}
new_checkpoint
[
"time_embedding.linear_1.weight"
]
=
unet_state_dict
[
"time_embed.0.weight"
]
new_checkpoint
[
"time_embedding.linear_1.bias"
]
=
unet_state_dict
[
"time_embed.0.bias"
]
new_checkpoint
[
"time_embedding.linear_2.weight"
]
=
unet_state_dict
[
"time_embed.2.weight"
]
new_checkpoint
[
"time_embedding.linear_2.bias"
]
=
unet_state_dict
[
"time_embed.2.bias"
]
if
config
[
"class_embed_type"
]
is
None
:
# No parameters to port
...
elif
config
[
"class_embed_type"
]
==
"timestep"
or
config
[
"class_embed_type"
]
==
"projection"
:
new_checkpoint
[
"class_embedding.linear_1.weight"
]
=
unet_state_dict
[
"label_emb.0.0.weight"
]
new_checkpoint
[
"class_embedding.linear_1.bias"
]
=
unet_state_dict
[
"label_emb.0.0.bias"
]
new_checkpoint
[
"class_embedding.linear_2.weight"
]
=
unet_state_dict
[
"label_emb.0.2.weight"
]
new_checkpoint
[
"class_embedding.linear_2.bias"
]
=
unet_state_dict
[
"label_emb.0.2.bias"
]
else
:
raise
NotImplementedError
(
f
"Not implemented `class_embed_type`:
{
config
[
'class_embed_type'
]
}
"
)
new_checkpoint
[
"conv_in.weight"
]
=
unet_state_dict
[
"input_blocks.0.0.weight"
]
new_checkpoint
[
"conv_in.bias"
]
=
unet_state_dict
[
"input_blocks.0.0.bias"
]
if
not
controlnet
:
new_checkpoint
[
"conv_norm_out.weight"
]
=
unet_state_dict
[
"out.0.weight"
]
new_checkpoint
[
"conv_norm_out.bias"
]
=
unet_state_dict
[
"out.0.bias"
]
new_checkpoint
[
"conv_out.weight"
]
=
unet_state_dict
[
"out.2.weight"
]
new_checkpoint
[
"conv_out.bias"
]
=
unet_state_dict
[
"out.2.bias"
]
# Retrieves the keys for the input blocks only
num_input_blocks
=
len
({
"."
.
join
(
layer
.
split
(
"."
)[:
2
])
for
layer
in
unet_state_dict
if
"input_blocks"
in
layer
})
input_blocks
=
{
layer_id
:
[
key
for
key
in
unet_state_dict
if
f
"input_blocks.
{
layer_id
}
"
in
key
]
for
layer_id
in
range
(
num_input_blocks
)
}
# Retrieves the keys for the middle blocks only
num_middle_blocks
=
len
({
"."
.
join
(
layer
.
split
(
"."
)[:
2
])
for
layer
in
unet_state_dict
if
"middle_block"
in
layer
})
middle_blocks
=
{
layer_id
:
[
key
for
key
in
unet_state_dict
if
f
"middle_block.
{
layer_id
}
"
in
key
]
for
layer_id
in
range
(
num_middle_blocks
)
}
# Retrieves the keys for the output blocks only
num_output_blocks
=
len
({
"."
.
join
(
layer
.
split
(
"."
)[:
2
])
for
layer
in
unet_state_dict
if
"output_blocks"
in
layer
})
output_blocks
=
{
layer_id
:
[
key
for
key
in
unet_state_dict
if
f
"output_blocks.
{
layer_id
}
"
in
key
]
for
layer_id
in
range
(
num_output_blocks
)
}
for
i
in
range
(
1
,
num_input_blocks
):
block_id
=
(
i
-
1
)
//
(
config
[
"layers_per_block"
]
+
1
)
layer_in_block_id
=
(
i
-
1
)
%
(
config
[
"layers_per_block"
]
+
1
)
resnets
=
[
key
for
key
in
input_blocks
[
i
]
if
f
"input_blocks.
{
i
}
.0"
in
key
and
f
"input_blocks.
{
i
}
.0.op"
not
in
key
]
attentions
=
[
key
for
key
in
input_blocks
[
i
]
if
f
"input_blocks.
{
i
}
.1"
in
key
]
if
f
"input_blocks.
{
i
}
.0.op.weight"
in
unet_state_dict
:
new_checkpoint
[
f
"down_blocks.
{
block_id
}
.downsamplers.0.conv.weight"
]
=
unet_state_dict
.
pop
(
f
"input_blocks.
{
i
}
.0.op.weight"
)
new_checkpoint
[
f
"down_blocks.
{
block_id
}
.downsamplers.0.conv.bias"
]
=
unet_state_dict
.
pop
(
f
"input_blocks.
{
i
}
.0.op.bias"
)
paths
=
renew_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"input_blocks.
{
i
}
.0"
,
"new"
:
f
"down_blocks.
{
block_id
}
.resnets.
{
layer_in_block_id
}
"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
unet_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
if
len
(
attentions
):
paths
=
renew_attention_paths
(
attentions
)
meta_path
=
{
"old"
:
f
"input_blocks.
{
i
}
.1"
,
"new"
:
f
"down_blocks.
{
block_id
}
.attentions.
{
layer_in_block_id
}
"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
unet_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
resnet_0
=
middle_blocks
[
0
]
attentions
=
middle_blocks
[
1
]
resnet_1
=
middle_blocks
[
2
]
resnet_0_paths
=
renew_resnet_paths
(
resnet_0
)
assign_to_checkpoint
(
resnet_0_paths
,
new_checkpoint
,
unet_state_dict
,
config
=
config
)
resnet_1_paths
=
renew_resnet_paths
(
resnet_1
)
assign_to_checkpoint
(
resnet_1_paths
,
new_checkpoint
,
unet_state_dict
,
config
=
config
)
attentions_paths
=
renew_attention_paths
(
attentions
)
meta_path
=
{
"old"
:
"middle_block.1"
,
"new"
:
"mid_block.attentions.0"
}
assign_to_checkpoint
(
attentions_paths
,
new_checkpoint
,
unet_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
for
i
in
range
(
num_output_blocks
):
block_id
=
i
//
(
config
[
"layers_per_block"
]
+
1
)
layer_in_block_id
=
i
%
(
config
[
"layers_per_block"
]
+
1
)
output_block_layers
=
[
shave_segments
(
name
,
2
)
for
name
in
output_blocks
[
i
]]
output_block_list
=
{}
for
layer
in
output_block_layers
:
layer_id
,
layer_name
=
layer
.
split
(
"."
)[
0
],
shave_segments
(
layer
,
1
)
if
layer_id
in
output_block_list
:
output_block_list
[
layer_id
].
append
(
layer_name
)
else
:
output_block_list
[
layer_id
]
=
[
layer_name
]
if
len
(
output_block_list
)
>
1
:
resnets
=
[
key
for
key
in
output_blocks
[
i
]
if
f
"output_blocks.
{
i
}
.0"
in
key
]
attentions
=
[
key
for
key
in
output_blocks
[
i
]
if
f
"output_blocks.
{
i
}
.1"
in
key
]
resnet_0_paths
=
renew_resnet_paths
(
resnets
)
paths
=
renew_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"output_blocks.
{
i
}
.0"
,
"new"
:
f
"up_blocks.
{
block_id
}
.resnets.
{
layer_in_block_id
}
"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
unet_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
output_block_list
=
{
k
:
sorted
(
v
)
for
k
,
v
in
output_block_list
.
items
()}
if
[
"conv.bias"
,
"conv.weight"
]
in
output_block_list
.
values
():
index
=
list
(
output_block_list
.
values
()).
index
([
"conv.bias"
,
"conv.weight"
])
new_checkpoint
[
f
"up_blocks.
{
block_id
}
.upsamplers.0.conv.weight"
]
=
unet_state_dict
[
f
"output_blocks.
{
i
}
.
{
index
}
.conv.weight"
]
new_checkpoint
[
f
"up_blocks.
{
block_id
}
.upsamplers.0.conv.bias"
]
=
unet_state_dict
[
f
"output_blocks.
{
i
}
.
{
index
}
.conv.bias"
]
# Clear attentions as they have been attributed above.
if
len
(
attentions
)
==
2
:
attentions
=
[]
if
len
(
attentions
):
paths
=
renew_attention_paths
(
attentions
)
meta_path
=
{
"old"
:
f
"output_blocks.
{
i
}
.1"
,
"new"
:
f
"up_blocks.
{
block_id
}
.attentions.
{
layer_in_block_id
}
"
,
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
unet_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
else
:
resnet_0_paths
=
renew_resnet_paths
(
output_block_layers
,
n_shave_prefix_segments
=
1
)
for
path
in
resnet_0_paths
:
old_path
=
"."
.
join
([
"output_blocks"
,
str
(
i
),
path
[
"old"
]])
new_path
=
"."
.
join
([
"up_blocks"
,
str
(
block_id
),
"resnets"
,
str
(
layer_in_block_id
),
path
[
"new"
]])
new_checkpoint
[
new_path
]
=
unet_state_dict
[
old_path
]
if
controlnet
:
# conditioning embedding
orig_index
=
0
new_checkpoint
[
"controlnet_cond_embedding.conv_in.weight"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.weight"
)
new_checkpoint
[
"controlnet_cond_embedding.conv_in.bias"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.bias"
)
orig_index
+=
2
diffusers_index
=
0
while
diffusers_index
<
6
:
new_checkpoint
[
f
"controlnet_cond_embedding.blocks.
{
diffusers_index
}
.weight"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.weight"
)
new_checkpoint
[
f
"controlnet_cond_embedding.blocks.
{
diffusers_index
}
.bias"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.bias"
)
diffusers_index
+=
1
orig_index
+=
2
new_checkpoint
[
"controlnet_cond_embedding.conv_out.weight"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.weight"
)
new_checkpoint
[
"controlnet_cond_embedding.conv_out.bias"
]
=
unet_state_dict
.
pop
(
f
"input_hint_block.
{
orig_index
}
.bias"
)
# down blocks
for
i
in
range
(
num_input_blocks
):
new_checkpoint
[
f
"controlnet_down_blocks.
{
i
}
.weight"
]
=
unet_state_dict
.
pop
(
f
"zero_convs.
{
i
}
.0.weight"
)
new_checkpoint
[
f
"controlnet_down_blocks.
{
i
}
.bias"
]
=
unet_state_dict
.
pop
(
f
"zero_convs.
{
i
}
.0.bias"
)
# mid block
new_checkpoint
[
"controlnet_mid_block.weight"
]
=
unet_state_dict
.
pop
(
"middle_block_out.0.weight"
)
new_checkpoint
[
"controlnet_mid_block.bias"
]
=
unet_state_dict
.
pop
(
"middle_block_out.0.bias"
)
return
new_checkpoint
def
convert_ldm_vae_checkpoint
(
checkpoint
,
config
):
# extract state dict for VAE
vae_state_dict
=
{}
vae_key
=
"first_stage_model."
keys
=
list
(
checkpoint
.
keys
())
for
key
in
keys
:
if
key
.
startswith
(
vae_key
):
vae_state_dict
[
key
.
replace
(
vae_key
,
""
)]
=
checkpoint
.
get
(
key
)
new_checkpoint
=
{}
new_checkpoint
[
"encoder.conv_in.weight"
]
=
vae_state_dict
[
"encoder.conv_in.weight"
]
new_checkpoint
[
"encoder.conv_in.bias"
]
=
vae_state_dict
[
"encoder.conv_in.bias"
]
new_checkpoint
[
"encoder.conv_out.weight"
]
=
vae_state_dict
[
"encoder.conv_out.weight"
]
new_checkpoint
[
"encoder.conv_out.bias"
]
=
vae_state_dict
[
"encoder.conv_out.bias"
]
new_checkpoint
[
"encoder.conv_norm_out.weight"
]
=
vae_state_dict
[
"encoder.norm_out.weight"
]
new_checkpoint
[
"encoder.conv_norm_out.bias"
]
=
vae_state_dict
[
"encoder.norm_out.bias"
]
new_checkpoint
[
"decoder.conv_in.weight"
]
=
vae_state_dict
[
"decoder.conv_in.weight"
]
new_checkpoint
[
"decoder.conv_in.bias"
]
=
vae_state_dict
[
"decoder.conv_in.bias"
]
new_checkpoint
[
"decoder.conv_out.weight"
]
=
vae_state_dict
[
"decoder.conv_out.weight"
]
new_checkpoint
[
"decoder.conv_out.bias"
]
=
vae_state_dict
[
"decoder.conv_out.bias"
]
new_checkpoint
[
"decoder.conv_norm_out.weight"
]
=
vae_state_dict
[
"decoder.norm_out.weight"
]
new_checkpoint
[
"decoder.conv_norm_out.bias"
]
=
vae_state_dict
[
"decoder.norm_out.bias"
]
new_checkpoint
[
"quant_conv.weight"
]
=
vae_state_dict
[
"quant_conv.weight"
]
new_checkpoint
[
"quant_conv.bias"
]
=
vae_state_dict
[
"quant_conv.bias"
]
new_checkpoint
[
"post_quant_conv.weight"
]
=
vae_state_dict
[
"post_quant_conv.weight"
]
new_checkpoint
[
"post_quant_conv.bias"
]
=
vae_state_dict
[
"post_quant_conv.bias"
]
# Retrieves the keys for the encoder down blocks only
num_down_blocks
=
len
({
"."
.
join
(
layer
.
split
(
"."
)[:
3
])
for
layer
in
vae_state_dict
if
"encoder.down"
in
layer
})
down_blocks
=
{
layer_id
:
[
key
for
key
in
vae_state_dict
if
f
"down.
{
layer_id
}
"
in
key
]
for
layer_id
in
range
(
num_down_blocks
)
}
# Retrieves the keys for the decoder up blocks only
num_up_blocks
=
len
({
"."
.
join
(
layer
.
split
(
"."
)[:
3
])
for
layer
in
vae_state_dict
if
"decoder.up"
in
layer
})
up_blocks
=
{
layer_id
:
[
key
for
key
in
vae_state_dict
if
f
"up.
{
layer_id
}
"
in
key
]
for
layer_id
in
range
(
num_up_blocks
)
}
for
i
in
range
(
num_down_blocks
):
resnets
=
[
key
for
key
in
down_blocks
[
i
]
if
f
"down.
{
i
}
"
in
key
and
f
"down.
{
i
}
.downsample"
not
in
key
]
if
f
"encoder.down.
{
i
}
.downsample.conv.weight"
in
vae_state_dict
:
new_checkpoint
[
f
"encoder.down_blocks.
{
i
}
.downsamplers.0.conv.weight"
]
=
vae_state_dict
.
pop
(
f
"encoder.down.
{
i
}
.downsample.conv.weight"
)
new_checkpoint
[
f
"encoder.down_blocks.
{
i
}
.downsamplers.0.conv.bias"
]
=
vae_state_dict
.
pop
(
f
"encoder.down.
{
i
}
.downsample.conv.bias"
)
paths
=
renew_vae_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"down.
{
i
}
.block"
,
"new"
:
f
"down_blocks.
{
i
}
.resnets"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
mid_resnets
=
[
key
for
key
in
vae_state_dict
if
"encoder.mid.block"
in
key
]
num_mid_res_blocks
=
2
for
i
in
range
(
1
,
num_mid_res_blocks
+
1
):
resnets
=
[
key
for
key
in
mid_resnets
if
f
"encoder.mid.block_
{
i
}
"
in
key
]
paths
=
renew_vae_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"mid.block_
{
i
}
"
,
"new"
:
f
"mid_block.resnets.
{
i
-
1
}
"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
mid_attentions
=
[
key
for
key
in
vae_state_dict
if
"encoder.mid.attn"
in
key
]
paths
=
renew_vae_attention_paths
(
mid_attentions
)
meta_path
=
{
"old"
:
"mid.attn_1"
,
"new"
:
"mid_block.attentions.0"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
conv_attn_to_linear
(
new_checkpoint
)
for
i
in
range
(
num_up_blocks
):
block_id
=
num_up_blocks
-
1
-
i
resnets
=
[
key
for
key
in
up_blocks
[
block_id
]
if
f
"up.
{
block_id
}
"
in
key
and
f
"up.
{
block_id
}
.upsample"
not
in
key
]
if
f
"decoder.up.
{
block_id
}
.upsample.conv.weight"
in
vae_state_dict
:
new_checkpoint
[
f
"decoder.up_blocks.
{
i
}
.upsamplers.0.conv.weight"
]
=
vae_state_dict
[
f
"decoder.up.
{
block_id
}
.upsample.conv.weight"
]
new_checkpoint
[
f
"decoder.up_blocks.
{
i
}
.upsamplers.0.conv.bias"
]
=
vae_state_dict
[
f
"decoder.up.
{
block_id
}
.upsample.conv.bias"
]
paths
=
renew_vae_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"up.
{
block_id
}
.block"
,
"new"
:
f
"up_blocks.
{
i
}
.resnets"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
mid_resnets
=
[
key
for
key
in
vae_state_dict
if
"decoder.mid.block"
in
key
]
num_mid_res_blocks
=
2
for
i
in
range
(
1
,
num_mid_res_blocks
+
1
):
resnets
=
[
key
for
key
in
mid_resnets
if
f
"decoder.mid.block_
{
i
}
"
in
key
]
paths
=
renew_vae_resnet_paths
(
resnets
)
meta_path
=
{
"old"
:
f
"mid.block_
{
i
}
"
,
"new"
:
f
"mid_block.resnets.
{
i
-
1
}
"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
mid_attentions
=
[
key
for
key
in
vae_state_dict
if
"decoder.mid.attn"
in
key
]
paths
=
renew_vae_attention_paths
(
mid_attentions
)
meta_path
=
{
"old"
:
"mid.attn_1"
,
"new"
:
"mid_block.attentions.0"
}
assign_to_checkpoint
(
paths
,
new_checkpoint
,
vae_state_dict
,
additional_replacements
=
[
meta_path
],
config
=
config
)
conv_attn_to_linear
(
new_checkpoint
)
return
new_checkpoint
def
convert_ldm_bert_checkpoint
(
checkpoint
,
config
):
def
_copy_attn_layer
(
hf_attn_layer
,
pt_attn_layer
):
hf_attn_layer
.
q_proj
.
weight
.
data
=
pt_attn_layer
.
to_q
.
weight
hf_attn_layer
.
k_proj
.
weight
.
data
=
pt_attn_layer
.
to_k
.
weight
hf_attn_layer
.
v_proj
.
weight
.
data
=
pt_attn_layer
.
to_v
.
weight
hf_attn_layer
.
out_proj
.
weight
=
pt_attn_layer
.
to_out
.
weight
hf_attn_layer
.
out_proj
.
bias
=
pt_attn_layer
.
to_out
.
bias
def
_copy_linear
(
hf_linear
,
pt_linear
):
hf_linear
.
weight
=
pt_linear
.
weight
hf_linear
.
bias
=
pt_linear
.
bias
def
_copy_layer
(
hf_layer
,
pt_layer
):
# copy layer norms
_copy_linear
(
hf_layer
.
self_attn_layer_norm
,
pt_layer
[
0
][
0
])
_copy_linear
(
hf_layer
.
final_layer_norm
,
pt_layer
[
1
][
0
])
# copy attn
_copy_attn_layer
(
hf_layer
.
self_attn
,
pt_layer
[
0
][
1
])
# copy MLP
pt_mlp
=
pt_layer
[
1
][
1
]
_copy_linear
(
hf_layer
.
fc1
,
pt_mlp
.
net
[
0
][
0
])
_copy_linear
(
hf_layer
.
fc2
,
pt_mlp
.
net
[
2
])
def
_copy_layers
(
hf_layers
,
pt_layers
):
for
i
,
hf_layer
in
enumerate
(
hf_layers
):
if
i
!=
0
:
i
+=
i
pt_layer
=
pt_layers
[
i
:
i
+
2
]
_copy_layer
(
hf_layer
,
pt_layer
)
hf_model
=
LDMBertModel
(
config
).
eval
()
# copy embeds
hf_model
.
model
.
embed_tokens
.
weight
=
checkpoint
.
transformer
.
token_emb
.
weight
hf_model
.
model
.
embed_positions
.
weight
.
data
=
checkpoint
.
transformer
.
pos_emb
.
emb
.
weight
# copy layer norm
_copy_linear
(
hf_model
.
model
.
layer_norm
,
checkpoint
.
transformer
.
norm
)
# copy hidden layers
_copy_layers
(
hf_model
.
model
.
layers
,
checkpoint
.
transformer
.
attn_layers
.
layers
)
_copy_linear
(
hf_model
.
to_logits
,
checkpoint
.
transformer
.
to_logits
)
return
hf_model
def
convert_ldm_clip_checkpoint
(
checkpoint
):
text_model
=
CLIPTextModel
.
from_pretrained
(
"openai/clip-vit-large-patch14"
)
keys
=
list
(
checkpoint
.
keys
())
text_model_dict
=
{}
for
key
in
keys
:
if
key
.
startswith
(
"cond_stage_model.transformer"
):
text_model_dict
[
key
[
len
(
"cond_stage_model.transformer."
)
:]]
=
checkpoint
[
key
]
text_model
.
load_state_dict
(
text_model_dict
)
return
text_model
textenc_conversion_lst
=
[
(
"cond_stage_model.model.positional_embedding"
,
"text_model.embeddings.position_embedding.weight"
),
(
"cond_stage_model.model.token_embedding.weight"
,
"text_model.embeddings.token_embedding.weight"
),
(
"cond_stage_model.model.ln_final.weight"
,
"text_model.final_layer_norm.weight"
),
(
"cond_stage_model.model.ln_final.bias"
,
"text_model.final_layer_norm.bias"
),
]
textenc_conversion_map
=
{
x
[
0
]:
x
[
1
]
for
x
in
textenc_conversion_lst
}
textenc_transformer_conversion_lst
=
[
# (stable-diffusion, HF Diffusers)
(
"resblocks."
,
"text_model.encoder.layers."
),
(
"ln_1"
,
"layer_norm1"
),
(
"ln_2"
,
"layer_norm2"
),
(
".c_fc."
,
".fc1."
),
(
".c_proj."
,
".fc2."
),
(
".attn"
,
".self_attn"
),
(
"ln_final."
,
"transformer.text_model.final_layer_norm."
),
(
"token_embedding.weight"
,
"transformer.text_model.embeddings.token_embedding.weight"
),
(
"positional_embedding"
,
"transformer.text_model.embeddings.position_embedding.weight"
),
]
protected
=
{
re
.
escape
(
x
[
0
]):
x
[
1
]
for
x
in
textenc_transformer_conversion_lst
}
textenc_pattern
=
re
.
compile
(
"|"
.
join
(
protected
.
keys
()))
def
convert_paint_by_example_checkpoint
(
checkpoint
):
config
=
CLIPVisionConfig
.
from_pretrained
(
"openai/clip-vit-large-patch14"
)
model
=
PaintByExampleImageEncoder
(
config
)
keys
=
list
(
checkpoint
.
keys
())
text_model_dict
=
{}
for
key
in
keys
:
if
key
.
startswith
(
"cond_stage_model.transformer"
):
text_model_dict
[
key
[
len
(
"cond_stage_model.transformer."
)
:]]
=
checkpoint
[
key
]
# load clip vision
model
.
model
.
load_state_dict
(
text_model_dict
)
# load mapper
keys_mapper
=
{
k
[
len
(
"cond_stage_model.mapper.res"
)
:]:
v
for
k
,
v
in
checkpoint
.
items
()
if
k
.
startswith
(
"cond_stage_model.mapper"
)
}
MAPPING
=
{
"attn.c_qkv"
:
[
"attn1.to_q"
,
"attn1.to_k"
,
"attn1.to_v"
],
"attn.c_proj"
:
[
"attn1.to_out.0"
],
"ln_1"
:
[
"norm1"
],
"ln_2"
:
[
"norm3"
],
"mlp.c_fc"
:
[
"ff.net.0.proj"
],
"mlp.c_proj"
:
[
"ff.net.2"
],
}
mapped_weights
=
{}
for
key
,
value
in
keys_mapper
.
items
():
prefix
=
key
[:
len
(
"blocks.i"
)]
suffix
=
key
.
split
(
prefix
)[
-
1
].
split
(
"."
)[
-
1
]
name
=
key
.
split
(
prefix
)[
-
1
].
split
(
suffix
)[
0
][
1
:
-
1
]
mapped_names
=
MAPPING
[
name
]
num_splits
=
len
(
mapped_names
)
for
i
,
mapped_name
in
enumerate
(
mapped_names
):
new_name
=
"."
.
join
([
prefix
,
mapped_name
,
suffix
])
shape
=
value
.
shape
[
0
]
//
num_splits
mapped_weights
[
new_name
]
=
value
[
i
*
shape
:
(
i
+
1
)
*
shape
]
model
.
mapper
.
load_state_dict
(
mapped_weights
)
# load final layer norm
model
.
final_layer_norm
.
load_state_dict
(
{
"bias"
:
checkpoint
[
"cond_stage_model.final_ln.bias"
],
"weight"
:
checkpoint
[
"cond_stage_model.final_ln.weight"
],
}
)
# load final proj
model
.
proj_out
.
load_state_dict
(
{
"bias"
:
checkpoint
[
"proj_out.bias"
],
"weight"
:
checkpoint
[
"proj_out.weight"
],
}
)
# load uncond vector
model
.
uncond_vector
.
data
=
torch
.
nn
.
Parameter
(
checkpoint
[
"learnable_vector"
])
return
model
def
convert_open_clip_checkpoint
(
checkpoint
):
text_model
=
CLIPTextModel
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
subfolder
=
"text_encoder"
)
keys
=
list
(
checkpoint
.
keys
())
text_model_dict
=
{}
if
"cond_stage_model.model.text_projection"
in
checkpoint
:
d_model
=
int
(
checkpoint
[
"cond_stage_model.model.text_projection"
].
shape
[
0
])
else
:
d_model
=
1024
text_model_dict
[
"text_model.embeddings.position_ids"
]
=
text_model
.
text_model
.
embeddings
.
get_buffer
(
"position_ids"
)
for
key
in
keys
:
if
"resblocks.23"
in
key
:
# Diffusers drops the final layer and only uses the penultimate layer
continue
if
key
in
textenc_conversion_map
:
text_model_dict
[
textenc_conversion_map
[
key
]]
=
checkpoint
[
key
]
if
key
.
startswith
(
"cond_stage_model.model.transformer."
):
new_key
=
key
[
len
(
"cond_stage_model.model.transformer."
)
:]
if
new_key
.
endswith
(
".in_proj_weight"
):
new_key
=
new_key
[:
-
len
(
".in_proj_weight"
)]
new_key
=
textenc_pattern
.
sub
(
lambda
m
:
protected
[
re
.
escape
(
m
.
group
(
0
))],
new_key
)
text_model_dict
[
new_key
+
".q_proj.weight"
]
=
checkpoint
[
key
][:
d_model
,
:]
text_model_dict
[
new_key
+
".k_proj.weight"
]
=
checkpoint
[
key
][
d_model
:
d_model
*
2
,
:]
text_model_dict
[
new_key
+
".v_proj.weight"
]
=
checkpoint
[
key
][
d_model
*
2
:,
:]
elif
new_key
.
endswith
(
".in_proj_bias"
):
new_key
=
new_key
[:
-
len
(
".in_proj_bias"
)]
new_key
=
textenc_pattern
.
sub
(
lambda
m
:
protected
[
re
.
escape
(
m
.
group
(
0
))],
new_key
)
text_model_dict
[
new_key
+
".q_proj.bias"
]
=
checkpoint
[
key
][:
d_model
]
text_model_dict
[
new_key
+
".k_proj.bias"
]
=
checkpoint
[
key
][
d_model
:
d_model
*
2
]
text_model_dict
[
new_key
+
".v_proj.bias"
]
=
checkpoint
[
key
][
d_model
*
2
:]
else
:
new_key
=
textenc_pattern
.
sub
(
lambda
m
:
protected
[
re
.
escape
(
m
.
group
(
0
))],
new_key
)
text_model_dict
[
new_key
]
=
checkpoint
[
key
]
text_model
.
load_state_dict
(
text_model_dict
)
return
text_model
def
stable_unclip_image_encoder
(
original_config
):
"""
Returns the image processor and clip image encoder for the img2img unclip pipeline.
We currently know of two types of stable unclip models which separately use the clip and the openclip image
encoders.
"""
image_embedder_config
=
original_config
.
model
.
params
.
embedder_config
sd_clip_image_embedder_class
=
image_embedder_config
.
target
sd_clip_image_embedder_class
=
sd_clip_image_embedder_class
.
split
(
"."
)[
-
1
]
if
sd_clip_image_embedder_class
==
"ClipImageEmbedder"
:
clip_model_name
=
image_embedder_config
.
params
.
model
if
clip_model_name
==
"ViT-L/14"
:
feature_extractor
=
CLIPImageProcessor
()
image_encoder
=
CLIPVisionModelWithProjection
.
from_pretrained
(
"openai/clip-vit-large-patch14"
)
else
:
raise
NotImplementedError
(
f
"Unknown CLIP checkpoint name in stable diffusion checkpoint
{
clip_model_name
}
"
)
elif
sd_clip_image_embedder_class
==
"FrozenOpenCLIPImageEmbedder"
:
feature_extractor
=
CLIPImageProcessor
()
image_encoder
=
CLIPVisionModelWithProjection
.
from_pretrained
(
"laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
)
else
:
raise
NotImplementedError
(
f
"Unknown CLIP image embedder class in stable diffusion checkpoint
{
sd_clip_image_embedder_class
}
"
)
return
feature_extractor
,
image_encoder
def
stable_unclip_image_noising_components
(
original_config
,
clip_stats_path
:
Optional
[
str
]
=
None
,
device
:
Optional
[
str
]
=
None
):
"""
Returns the noising components for the img2img and txt2img unclip pipelines.
Converts the stability noise augmentor into
1. a `StableUnCLIPImageNormalizer` for holding the CLIP stats
2. a `DDPMScheduler` for holding the noise schedule
If the noise augmentor config specifies a clip stats path, the `clip_stats_path` must be provided.
"""
noise_aug_config
=
original_config
.
model
.
params
.
noise_aug_config
noise_aug_class
=
noise_aug_config
.
target
noise_aug_class
=
noise_aug_class
.
split
(
"."
)[
-
1
]
if
noise_aug_class
==
"CLIPEmbeddingNoiseAugmentation"
:
noise_aug_config
=
noise_aug_config
.
params
embedding_dim
=
noise_aug_config
.
timestep_dim
max_noise_level
=
noise_aug_config
.
noise_schedule_config
.
timesteps
beta_schedule
=
noise_aug_config
.
noise_schedule_config
.
beta_schedule
image_normalizer
=
StableUnCLIPImageNormalizer
(
embedding_dim
=
embedding_dim
)
image_noising_scheduler
=
DDPMScheduler
(
num_train_timesteps
=
max_noise_level
,
beta_schedule
=
beta_schedule
)
if
"clip_stats_path"
in
noise_aug_config
:
if
clip_stats_path
is
None
:
raise
ValueError
(
"This stable unclip config requires a `clip_stats_path`"
)
clip_mean
,
clip_std
=
torch
.
load
(
clip_stats_path
,
map_location
=
device
)
clip_mean
=
clip_mean
[
None
,
:]
clip_std
=
clip_std
[
None
,
:]
clip_stats_state_dict
=
{
"mean"
:
clip_mean
,
"std"
:
clip_std
,
}
image_normalizer
.
load_state_dict
(
clip_stats_state_dict
)
else
:
raise
NotImplementedError
(
f
"Unknown noise augmentor class:
{
noise_aug_class
}
"
)
return
image_normalizer
,
image_noising_scheduler
def
convert_controlnet_checkpoint
(
checkpoint
,
original_config
,
checkpoint_path
,
image_size
,
upcast_attention
,
extract_ema
):
ctrlnet_config
=
create_unet_diffusers_config
(
original_config
,
image_size
=
image_size
,
controlnet
=
True
)
ctrlnet_config
[
"upcast_attention"
]
=
upcast_attention
ctrlnet_config
.
pop
(
"sample_size"
)
controlnet_model
=
ControlNetModel
(
**
ctrlnet_config
)
converted_ctrl_checkpoint
=
convert_ldm_unet_checkpoint
(
checkpoint
,
ctrlnet_config
,
path
=
checkpoint_path
,
extract_ema
=
extract_ema
,
controlnet
=
True
)
controlnet_model
.
load_state_dict
(
converted_ctrl_checkpoint
)
return
controlnet_model
animatediff/utils/convert_lora_safetensor_to_diffusers.py
0 → 100644
View file @
214c357b
# coding=utf-8
# Copyright 2023, Haofan Wang, Qixun Wang, All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Changes were made to this source code by Yuwei Guo.
""" Conversion script for the LoRA's safetensors checkpoints. """
import
argparse
import
torch
from
safetensors.torch
import
load_file
from
diffusers
import
StableDiffusionPipeline
def
load_diffusers_lora
(
pipeline
,
state_dict
,
alpha
=
1.0
):
# directly update weight in diffusers model
for
key
in
state_dict
:
# only process lora down key
if
"up."
in
key
:
continue
up_key
=
key
.
replace
(
".down."
,
".up."
)
model_key
=
key
.
replace
(
"processor."
,
""
).
replace
(
"_lora"
,
""
).
replace
(
"down."
,
""
).
replace
(
"up."
,
""
)
model_key
=
model_key
.
replace
(
"to_out."
,
"to_out.0."
)
layer_infos
=
model_key
.
split
(
"."
)[:
-
1
]
curr_layer
=
pipeline
.
unet
while
len
(
layer_infos
)
>
0
:
temp_name
=
layer_infos
.
pop
(
0
)
curr_layer
=
curr_layer
.
__getattr__
(
temp_name
)
weight_down
=
state_dict
[
key
]
weight_up
=
state_dict
[
up_key
]
curr_layer
.
weight
.
data
+=
alpha
*
torch
.
mm
(
weight_up
,
weight_down
).
to
(
curr_layer
.
weight
.
data
.
device
)
return
pipeline
def
convert_lora
(
pipeline
,
state_dict
,
LORA_PREFIX_UNET
=
"lora_unet"
,
LORA_PREFIX_TEXT_ENCODER
=
"lora_te"
,
alpha
=
0.6
):
# load base model
# pipeline = StableDiffusionPipeline.from_pretrained(base_model_path, torch_dtype=torch.float32)
# load LoRA weight from .safetensors
# state_dict = load_file(checkpoint_path)
visited
=
[]
# directly update weight in diffusers model
for
key
in
state_dict
:
# it is suggested to print out the key, it usually will be something like below
# "lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight"
# as we have set the alpha beforehand, so just skip
if
".alpha"
in
key
or
key
in
visited
:
continue
if
"text"
in
key
:
layer_infos
=
key
.
split
(
"."
)[
0
].
split
(
LORA_PREFIX_TEXT_ENCODER
+
"_"
)[
-
1
].
split
(
"_"
)
curr_layer
=
pipeline
.
text_encoder
else
:
layer_infos
=
key
.
split
(
"."
)[
0
].
split
(
LORA_PREFIX_UNET
+
"_"
)[
-
1
].
split
(
"_"
)
curr_layer
=
pipeline
.
unet
# find the target layer
temp_name
=
layer_infos
.
pop
(
0
)
while
len
(
layer_infos
)
>
-
1
:
try
:
curr_layer
=
curr_layer
.
__getattr__
(
temp_name
)
if
len
(
layer_infos
)
>
0
:
temp_name
=
layer_infos
.
pop
(
0
)
elif
len
(
layer_infos
)
==
0
:
break
except
Exception
:
if
len
(
temp_name
)
>
0
:
temp_name
+=
"_"
+
layer_infos
.
pop
(
0
)
else
:
temp_name
=
layer_infos
.
pop
(
0
)
pair_keys
=
[]
if
"lora_down"
in
key
:
pair_keys
.
append
(
key
.
replace
(
"lora_down"
,
"lora_up"
))
pair_keys
.
append
(
key
)
else
:
pair_keys
.
append
(
key
)
pair_keys
.
append
(
key
.
replace
(
"lora_up"
,
"lora_down"
))
# update weight
if
len
(
state_dict
[
pair_keys
[
0
]].
shape
)
==
4
:
weight_up
=
state_dict
[
pair_keys
[
0
]].
squeeze
(
3
).
squeeze
(
2
).
to
(
torch
.
float32
)
weight_down
=
state_dict
[
pair_keys
[
1
]].
squeeze
(
3
).
squeeze
(
2
).
to
(
torch
.
float32
)
curr_layer
.
weight
.
data
+=
alpha
*
torch
.
mm
(
weight_up
,
weight_down
).
unsqueeze
(
2
).
unsqueeze
(
3
).
to
(
curr_layer
.
weight
.
data
.
device
)
else
:
weight_up
=
state_dict
[
pair_keys
[
0
]].
to
(
torch
.
float32
)
weight_down
=
state_dict
[
pair_keys
[
1
]].
to
(
torch
.
float32
)
curr_layer
.
weight
.
data
+=
alpha
*
torch
.
mm
(
weight_up
,
weight_down
).
to
(
curr_layer
.
weight
.
data
.
device
)
# update visited list
for
item
in
pair_keys
:
visited
.
append
(
item
)
return
pipeline
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base_model_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the base model in diffusers format."
)
parser
.
add_argument
(
"--checkpoint_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the checkpoint to convert."
)
parser
.
add_argument
(
"--dump_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the output model."
)
parser
.
add_argument
(
"--lora_prefix_unet"
,
default
=
"lora_unet"
,
type
=
str
,
help
=
"The prefix of UNet weight in safetensors"
)
parser
.
add_argument
(
"--lora_prefix_text_encoder"
,
default
=
"lora_te"
,
type
=
str
,
help
=
"The prefix of text encoder weight in safetensors"
,
)
parser
.
add_argument
(
"--alpha"
,
default
=
0.75
,
type
=
float
,
help
=
"The merging ratio in W = W0 + alpha * deltaW"
)
parser
.
add_argument
(
"--to_safetensors"
,
action
=
"store_true"
,
help
=
"Whether to store pipeline in safetensors format or not."
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
help
=
"Device to use (e.g. cpu, cuda:0, cuda:1, etc.)"
)
args
=
parser
.
parse_args
()
base_model_path
=
args
.
base_model_path
checkpoint_path
=
args
.
checkpoint_path
dump_path
=
args
.
dump_path
lora_prefix_unet
=
args
.
lora_prefix_unet
lora_prefix_text_encoder
=
args
.
lora_prefix_text_encoder
alpha
=
args
.
alpha
pipe
=
convert
(
base_model_path
,
checkpoint_path
,
lora_prefix_unet
,
lora_prefix_text_encoder
,
alpha
)
pipe
=
pipe
.
to
(
args
.
device
)
pipe
.
save_pretrained
(
args
.
dump_path
,
safe_serialization
=
args
.
to_safetensors
)
animatediff/utils/util.py
0 → 100644
View file @
214c357b
import
os
import
imageio
import
numpy
as
np
from
typing
import
Union
import
torch
import
torchvision
import
torch.distributed
as
dist
from
safetensors
import
safe_open
from
tqdm
import
tqdm
from
einops
import
rearrange
from
animatediff.utils.convert_from_ckpt
import
convert_ldm_unet_checkpoint
,
convert_ldm_clip_checkpoint
,
convert_ldm_vae_checkpoint
from
animatediff.utils.convert_lora_safetensor_to_diffusers
import
convert_lora
,
load_diffusers_lora
def
zero_rank_print
(
s
):
if
(
not
dist
.
is_initialized
())
and
(
dist
.
is_initialized
()
and
dist
.
get_rank
()
==
0
):
print
(
"### "
+
s
)
def
save_videos_grid
(
videos
:
torch
.
Tensor
,
path
:
str
,
rescale
=
False
,
n_rows
=
6
,
fps
=
8
):
videos
=
rearrange
(
videos
,
"b c t h w -> t b c h w"
)
outputs
=
[]
for
x
in
videos
:
x
=
torchvision
.
utils
.
make_grid
(
x
,
nrow
=
n_rows
)
x
=
x
.
transpose
(
0
,
1
).
transpose
(
1
,
2
).
squeeze
(
-
1
)
if
rescale
:
x
=
(
x
+
1.0
)
/
2.0
# -1,1 -> 0,1
x
=
(
x
*
255
).
numpy
().
astype
(
np
.
uint8
)
outputs
.
append
(
x
)
os
.
makedirs
(
os
.
path
.
dirname
(
path
),
exist_ok
=
True
)
imageio
.
mimsave
(
path
,
outputs
,
fps
=
fps
)
# DDIM Inversion
@
torch
.
no_grad
()
def
init_prompt
(
prompt
,
pipeline
):
uncond_input
=
pipeline
.
tokenizer
(
[
""
],
padding
=
"max_length"
,
max_length
=
pipeline
.
tokenizer
.
model_max_length
,
return_tensors
=
"pt"
)
uncond_embeddings
=
pipeline
.
text_encoder
(
uncond_input
.
input_ids
.
to
(
pipeline
.
device
))[
0
]
text_input
=
pipeline
.
tokenizer
(
[
prompt
],
padding
=
"max_length"
,
max_length
=
pipeline
.
tokenizer
.
model_max_length
,
truncation
=
True
,
return_tensors
=
"pt"
,
)
text_embeddings
=
pipeline
.
text_encoder
(
text_input
.
input_ids
.
to
(
pipeline
.
device
))[
0
]
context
=
torch
.
cat
([
uncond_embeddings
,
text_embeddings
])
return
context
def
next_step
(
model_output
:
Union
[
torch
.
FloatTensor
,
np
.
ndarray
],
timestep
:
int
,
sample
:
Union
[
torch
.
FloatTensor
,
np
.
ndarray
],
ddim_scheduler
):
timestep
,
next_timestep
=
min
(
timestep
-
ddim_scheduler
.
config
.
num_train_timesteps
//
ddim_scheduler
.
num_inference_steps
,
999
),
timestep
alpha_prod_t
=
ddim_scheduler
.
alphas_cumprod
[
timestep
]
if
timestep
>=
0
else
ddim_scheduler
.
final_alpha_cumprod
alpha_prod_t_next
=
ddim_scheduler
.
alphas_cumprod
[
next_timestep
]
beta_prod_t
=
1
-
alpha_prod_t
next_original_sample
=
(
sample
-
beta_prod_t
**
0.5
*
model_output
)
/
alpha_prod_t
**
0.5
next_sample_direction
=
(
1
-
alpha_prod_t_next
)
**
0.5
*
model_output
next_sample
=
alpha_prod_t_next
**
0.5
*
next_original_sample
+
next_sample_direction
return
next_sample
def
get_noise_pred_single
(
latents
,
t
,
context
,
unet
):
noise_pred
=
unet
(
latents
,
t
,
encoder_hidden_states
=
context
)[
"sample"
]
return
noise_pred
@
torch
.
no_grad
()
def
ddim_loop
(
pipeline
,
ddim_scheduler
,
latent
,
num_inv_steps
,
prompt
):
context
=
init_prompt
(
prompt
,
pipeline
)
uncond_embeddings
,
cond_embeddings
=
context
.
chunk
(
2
)
all_latent
=
[
latent
]
latent
=
latent
.
clone
().
detach
()
for
i
in
tqdm
(
range
(
num_inv_steps
)):
t
=
ddim_scheduler
.
timesteps
[
len
(
ddim_scheduler
.
timesteps
)
-
i
-
1
]
noise_pred
=
get_noise_pred_single
(
latent
,
t
,
cond_embeddings
,
pipeline
.
unet
)
latent
=
next_step
(
noise_pred
,
t
,
latent
,
ddim_scheduler
)
all_latent
.
append
(
latent
)
return
all_latent
@
torch
.
no_grad
()
def
ddim_inversion
(
pipeline
,
ddim_scheduler
,
video_latent
,
num_inv_steps
,
prompt
=
""
):
ddim_latents
=
ddim_loop
(
pipeline
,
ddim_scheduler
,
video_latent
,
num_inv_steps
,
prompt
)
return
ddim_latents
def
load_weights
(
animation_pipeline
,
# motion module
motion_module_path
=
""
,
motion_module_lora_configs
=
[],
# domain adapter
adapter_lora_path
=
""
,
adapter_lora_scale
=
1.0
,
# image layers
dreambooth_model_path
=
""
,
lora_model_path
=
""
,
lora_alpha
=
0.8
,
):
# motion module
unet_state_dict
=
{}
if
motion_module_path
!=
""
:
print
(
f
"load motion module from
{
motion_module_path
}
"
)
motion_module_state_dict
=
torch
.
load
(
motion_module_path
,
map_location
=
"cpu"
)
motion_module_state_dict
=
motion_module_state_dict
[
"state_dict"
]
if
"state_dict"
in
motion_module_state_dict
else
motion_module_state_dict
unet_state_dict
.
update
({
name
:
param
for
name
,
param
in
motion_module_state_dict
.
items
()
if
"motion_modules."
in
name
})
unet_state_dict
.
pop
(
"animatediff_config"
,
""
)
missing
,
unexpected
=
animation_pipeline
.
unet
.
load_state_dict
(
unet_state_dict
,
strict
=
False
)
# assert len(unexpected) == 0
del
unet_state_dict
# base model
if
dreambooth_model_path
!=
""
:
print
(
f
"load dreambooth model from
{
dreambooth_model_path
}
"
)
if
dreambooth_model_path
.
endswith
(
".safetensors"
):
dreambooth_state_dict
=
{}
with
safe_open
(
dreambooth_model_path
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
dreambooth_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
elif
dreambooth_model_path
.
endswith
(
".ckpt"
):
dreambooth_state_dict
=
torch
.
load
(
dreambooth_model_path
,
map_location
=
"cpu"
)
# 1. vae
converted_vae_checkpoint
=
convert_ldm_vae_checkpoint
(
dreambooth_state_dict
,
animation_pipeline
.
vae
.
config
)
animation_pipeline
.
vae
.
load_state_dict
(
converted_vae_checkpoint
)
# 2. unet
converted_unet_checkpoint
=
convert_ldm_unet_checkpoint
(
dreambooth_state_dict
,
animation_pipeline
.
unet
.
config
)
animation_pipeline
.
unet
.
load_state_dict
(
converted_unet_checkpoint
,
strict
=
False
)
# 3. text_model
animation_pipeline
.
text_encoder
=
convert_ldm_clip_checkpoint
(
dreambooth_state_dict
)
del
dreambooth_state_dict
# lora layers
if
lora_model_path
!=
""
:
print
(
f
"load lora model from
{
lora_model_path
}
"
)
assert
lora_model_path
.
endswith
(
".safetensors"
)
lora_state_dict
=
{}
with
safe_open
(
lora_model_path
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
lora_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
animation_pipeline
=
convert_lora
(
animation_pipeline
,
lora_state_dict
,
alpha
=
lora_alpha
)
del
lora_state_dict
# domain adapter lora
if
adapter_lora_path
!=
""
:
print
(
f
"load domain lora from
{
adapter_lora_path
}
"
)
domain_lora_state_dict
=
torch
.
load
(
adapter_lora_path
,
map_location
=
"cpu"
)
domain_lora_state_dict
=
domain_lora_state_dict
[
"state_dict"
]
if
"state_dict"
in
domain_lora_state_dict
else
domain_lora_state_dict
domain_lora_state_dict
.
pop
(
"animatediff_config"
,
""
)
animation_pipeline
=
load_diffusers_lora
(
animation_pipeline
,
domain_lora_state_dict
,
alpha
=
adapter_lora_scale
)
# motion module lora
for
motion_module_lora_config
in
motion_module_lora_configs
:
path
,
alpha
=
motion_module_lora_config
[
"path"
],
motion_module_lora_config
[
"alpha"
]
print
(
f
"load motion LoRA from
{
path
}
"
)
motion_lora_state_dict
=
torch
.
load
(
path
,
map_location
=
"cpu"
)
motion_lora_state_dict
=
motion_lora_state_dict
[
"state_dict"
]
if
"state_dict"
in
motion_lora_state_dict
else
motion_lora_state_dict
motion_lora_state_dict
.
pop
(
"animatediff_config"
,
""
)
animation_pipeline
=
load_diffusers_lora
(
animation_pipeline
,
motion_lora_state_dict
,
alpha
)
return
animation_pipeline
app.py
0 → 100644
View file @
214c357b
import
os
import
json
import
torch
import
random
import
gradio
as
gr
from
glob
import
glob
from
omegaconf
import
OmegaConf
from
datetime
import
datetime
from
safetensors
import
safe_open
from
diffusers
import
AutoencoderKL
from
diffusers
import
DDIMScheduler
,
EulerDiscreteScheduler
,
PNDMScheduler
from
diffusers.utils.import_utils
import
is_xformers_available
from
transformers
import
CLIPTextModel
,
CLIPTokenizer
from
animatediff.models.unet
import
UNet3DConditionModel
from
animatediff.pipelines.pipeline_animation
import
AnimationPipeline
from
animatediff.utils.util
import
save_videos_grid
from
animatediff.utils.convert_from_ckpt
import
convert_ldm_unet_checkpoint
,
convert_ldm_clip_checkpoint
,
convert_ldm_vae_checkpoint
from
animatediff.utils.convert_lora_safetensor_to_diffusers
import
convert_lora
sample_idx
=
0
scheduler_dict
=
{
"Euler"
:
EulerDiscreteScheduler
,
"PNDM"
:
PNDMScheduler
,
"DDIM"
:
DDIMScheduler
,
}
css
=
"""
.toolbutton {
margin-buttom: 0em 0em 0em 0em;
max-width: 2.5em;
min-width: 2.5em !important;
height: 2.5em;
}
"""
class
AnimateController
:
def
__init__
(
self
):
# config dirs
self
.
basedir
=
os
.
getcwd
()
self
.
stable_diffusion_dir
=
os
.
path
.
join
(
self
.
basedir
,
"models"
,
"StableDiffusion"
)
self
.
motion_module_dir
=
os
.
path
.
join
(
self
.
basedir
,
"models"
,
"Motion_Module"
)
self
.
personalized_model_dir
=
os
.
path
.
join
(
self
.
basedir
,
"models"
,
"DreamBooth_LoRA"
)
self
.
savedir
=
os
.
path
.
join
(
self
.
basedir
,
"samples"
,
datetime
.
now
().
strftime
(
"Gradio-%Y-%m-%dT%H-%M-%S"
))
self
.
savedir_sample
=
os
.
path
.
join
(
self
.
savedir
,
"sample"
)
os
.
makedirs
(
self
.
savedir
,
exist_ok
=
True
)
self
.
stable_diffusion_list
=
[]
self
.
motion_module_list
=
[]
self
.
personalized_model_list
=
[]
self
.
refresh_stable_diffusion
()
self
.
refresh_motion_module
()
self
.
refresh_personalized_model
()
# config models
self
.
tokenizer
=
None
self
.
text_encoder
=
None
self
.
vae
=
None
self
.
unet
=
None
self
.
pipeline
=
None
self
.
lora_model_state_dict
=
{}
self
.
inference_config
=
OmegaConf
.
load
(
"configs/inference/inference.yaml"
)
def
refresh_stable_diffusion
(
self
):
self
.
stable_diffusion_list
=
glob
(
os
.
path
.
join
(
self
.
stable_diffusion_dir
,
"*/"
))
def
refresh_motion_module
(
self
):
motion_module_list
=
glob
(
os
.
path
.
join
(
self
.
motion_module_dir
,
"*.ckpt"
))
self
.
motion_module_list
=
[
os
.
path
.
basename
(
p
)
for
p
in
motion_module_list
]
def
refresh_personalized_model
(
self
):
personalized_model_list
=
glob
(
os
.
path
.
join
(
self
.
personalized_model_dir
,
"*.safetensors"
))
self
.
personalized_model_list
=
[
os
.
path
.
basename
(
p
)
for
p
in
personalized_model_list
]
def
update_stable_diffusion
(
self
,
stable_diffusion_dropdown
):
self
.
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
stable_diffusion_dropdown
,
subfolder
=
"tokenizer"
)
self
.
text_encoder
=
CLIPTextModel
.
from_pretrained
(
stable_diffusion_dropdown
,
subfolder
=
"text_encoder"
).
cuda
()
self
.
vae
=
AutoencoderKL
.
from_pretrained
(
stable_diffusion_dropdown
,
subfolder
=
"vae"
).
cuda
()
self
.
unet
=
UNet3DConditionModel
.
from_pretrained_2d
(
stable_diffusion_dropdown
,
subfolder
=
"unet"
,
unet_additional_kwargs
=
OmegaConf
.
to_container
(
self
.
inference_config
.
unet_additional_kwargs
)).
cuda
()
return
gr
.
Dropdown
.
update
()
def
update_motion_module
(
self
,
motion_module_dropdown
):
if
self
.
unet
is
None
:
gr
.
Info
(
f
"Please select a pretrained model path."
)
return
gr
.
Dropdown
.
update
(
value
=
None
)
else
:
motion_module_dropdown
=
os
.
path
.
join
(
self
.
motion_module_dir
,
motion_module_dropdown
)
motion_module_state_dict
=
torch
.
load
(
motion_module_dropdown
,
map_location
=
"cpu"
)
missing
,
unexpected
=
self
.
unet
.
load_state_dict
(
motion_module_state_dict
,
strict
=
False
)
assert
len
(
unexpected
)
==
0
return
gr
.
Dropdown
.
update
()
def
update_base_model
(
self
,
base_model_dropdown
):
if
self
.
unet
is
None
:
gr
.
Info
(
f
"Please select a pretrained model path."
)
return
gr
.
Dropdown
.
update
(
value
=
None
)
else
:
base_model_dropdown
=
os
.
path
.
join
(
self
.
personalized_model_dir
,
base_model_dropdown
)
base_model_state_dict
=
{}
with
safe_open
(
base_model_dropdown
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
base_model_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
converted_vae_checkpoint
=
convert_ldm_vae_checkpoint
(
base_model_state_dict
,
self
.
vae
.
config
)
self
.
vae
.
load_state_dict
(
converted_vae_checkpoint
)
converted_unet_checkpoint
=
convert_ldm_unet_checkpoint
(
base_model_state_dict
,
self
.
unet
.
config
)
self
.
unet
.
load_state_dict
(
converted_unet_checkpoint
,
strict
=
False
)
self
.
text_encoder
=
convert_ldm_clip_checkpoint
(
base_model_state_dict
)
return
gr
.
Dropdown
.
update
()
def
update_lora_model
(
self
,
lora_model_dropdown
):
lora_model_dropdown
=
os
.
path
.
join
(
self
.
personalized_model_dir
,
lora_model_dropdown
)
self
.
lora_model_state_dict
=
{}
if
lora_model_dropdown
==
"none"
:
pass
else
:
with
safe_open
(
lora_model_dropdown
,
framework
=
"pt"
,
device
=
"cpu"
)
as
f
:
for
key
in
f
.
keys
():
self
.
lora_model_state_dict
[
key
]
=
f
.
get_tensor
(
key
)
return
gr
.
Dropdown
.
update
()
def
animate
(
self
,
stable_diffusion_dropdown
,
motion_module_dropdown
,
base_model_dropdown
,
lora_alpha_slider
,
prompt_textbox
,
negative_prompt_textbox
,
sampler_dropdown
,
sample_step_slider
,
width_slider
,
length_slider
,
height_slider
,
cfg_scale_slider
,
seed_textbox
):
if
self
.
unet
is
None
:
raise
gr
.
Error
(
f
"Please select a pretrained model path."
)
if
motion_module_dropdown
==
""
:
raise
gr
.
Error
(
f
"Please select a motion module."
)
if
base_model_dropdown
==
""
:
raise
gr
.
Error
(
f
"Please select a base DreamBooth model."
)
if
is_xformers_available
():
self
.
unet
.
enable_xformers_memory_efficient_attention
()
pipeline
=
AnimationPipeline
(
vae
=
self
.
vae
,
text_encoder
=
self
.
text_encoder
,
tokenizer
=
self
.
tokenizer
,
unet
=
self
.
unet
,
scheduler
=
scheduler_dict
[
sampler_dropdown
](
**
OmegaConf
.
to_container
(
self
.
inference_config
.
noise_scheduler_kwargs
))
).
to
(
"cuda"
)
if
self
.
lora_model_state_dict
!=
{}:
pipeline
=
convert_lora
(
pipeline
,
self
.
lora_model_state_dict
,
alpha
=
lora_alpha_slider
)
pipeline
.
to
(
"cuda"
)
if
seed_textbox
!=
-
1
and
seed_textbox
!=
""
:
torch
.
manual_seed
(
int
(
seed_textbox
))
else
:
torch
.
seed
()
seed
=
torch
.
initial_seed
()
sample
=
pipeline
(
prompt_textbox
,
negative_prompt
=
negative_prompt_textbox
,
num_inference_steps
=
sample_step_slider
,
guidance_scale
=
cfg_scale_slider
,
width
=
width_slider
,
height
=
height_slider
,
video_length
=
length_slider
,
).
videos
save_sample_path
=
os
.
path
.
join
(
self
.
savedir_sample
,
f
"
{
sample_idx
}
.mp4"
)
save_videos_grid
(
sample
,
save_sample_path
)
sample_config
=
{
"prompt"
:
prompt_textbox
,
"n_prompt"
:
negative_prompt_textbox
,
"sampler"
:
sampler_dropdown
,
"num_inference_steps"
:
sample_step_slider
,
"guidance_scale"
:
cfg_scale_slider
,
"width"
:
width_slider
,
"height"
:
height_slider
,
"video_length"
:
length_slider
,
"seed"
:
seed
}
json_str
=
json
.
dumps
(
sample_config
,
indent
=
4
)
with
open
(
os
.
path
.
join
(
self
.
savedir
,
"logs.json"
),
"a"
)
as
f
:
f
.
write
(
json_str
)
f
.
write
(
"
\n\n
"
)
return
gr
.
Video
.
update
(
value
=
save_sample_path
)
controller
=
AnimateController
()
def
ui
():
with
gr
.
Blocks
(
css
=
css
)
as
demo
:
gr
.
Markdown
(
"""
# [AnimateDiff: Animate Your Personalized Text-to-Image Diffusion Models without Specific Tuning](https://arxiv.org/abs/2307.04725)
Yuwei Guo, Ceyuan Yang*, Anyi Rao, Yaohui Wang, Yu Qiao, Dahua Lin, Bo Dai (*Corresponding Author)<br>
[Arxiv Report](https://arxiv.org/abs/2307.04725) | [Project Page](https://animatediff.github.io/) | [Github](https://github.com/guoyww/animatediff/)
"""
)
with
gr
.
Column
(
variant
=
"panel"
):
gr
.
Markdown
(
"""
### 1. Model checkpoints (select pretrained model path first).
"""
)
with
gr
.
Row
():
stable_diffusion_dropdown
=
gr
.
Dropdown
(
label
=
"Pretrained Model Path"
,
choices
=
controller
.
stable_diffusion_list
,
interactive
=
True
,
)
stable_diffusion_dropdown
.
change
(
fn
=
controller
.
update_stable_diffusion
,
inputs
=
[
stable_diffusion_dropdown
],
outputs
=
[
stable_diffusion_dropdown
])
stable_diffusion_refresh_button
=
gr
.
Button
(
value
=
"
\U0001F503
"
,
elem_classes
=
"toolbutton"
)
def
update_stable_diffusion
():
controller
.
refresh_stable_diffusion
()
return
gr
.
Dropdown
.
update
(
choices
=
controller
.
stable_diffusion_list
)
stable_diffusion_refresh_button
.
click
(
fn
=
update_stable_diffusion
,
inputs
=
[],
outputs
=
[
stable_diffusion_dropdown
])
with
gr
.
Row
():
motion_module_dropdown
=
gr
.
Dropdown
(
label
=
"Select motion module"
,
choices
=
controller
.
motion_module_list
,
interactive
=
True
,
)
motion_module_dropdown
.
change
(
fn
=
controller
.
update_motion_module
,
inputs
=
[
motion_module_dropdown
],
outputs
=
[
motion_module_dropdown
])
motion_module_refresh_button
=
gr
.
Button
(
value
=
"
\U0001F503
"
,
elem_classes
=
"toolbutton"
)
def
update_motion_module
():
controller
.
refresh_motion_module
()
return
gr
.
Dropdown
.
update
(
choices
=
controller
.
motion_module_list
)
motion_module_refresh_button
.
click
(
fn
=
update_motion_module
,
inputs
=
[],
outputs
=
[
motion_module_dropdown
])
base_model_dropdown
=
gr
.
Dropdown
(
label
=
"Select base Dreambooth model (required)"
,
choices
=
controller
.
personalized_model_list
,
interactive
=
True
,
)
base_model_dropdown
.
change
(
fn
=
controller
.
update_base_model
,
inputs
=
[
base_model_dropdown
],
outputs
=
[
base_model_dropdown
])
lora_model_dropdown
=
gr
.
Dropdown
(
label
=
"Select LoRA model (optional)"
,
choices
=
[
"none"
]
+
controller
.
personalized_model_list
,
value
=
"none"
,
interactive
=
True
,
)
lora_model_dropdown
.
change
(
fn
=
controller
.
update_lora_model
,
inputs
=
[
lora_model_dropdown
],
outputs
=
[
lora_model_dropdown
])
lora_alpha_slider
=
gr
.
Slider
(
label
=
"LoRA alpha"
,
value
=
0.8
,
minimum
=
0
,
maximum
=
2
,
interactive
=
True
)
personalized_refresh_button
=
gr
.
Button
(
value
=
"
\U0001F503
"
,
elem_classes
=
"toolbutton"
)
def
update_personalized_model
():
controller
.
refresh_personalized_model
()
return
[
gr
.
Dropdown
.
update
(
choices
=
controller
.
personalized_model_list
),
gr
.
Dropdown
.
update
(
choices
=
[
"none"
]
+
controller
.
personalized_model_list
)
]
personalized_refresh_button
.
click
(
fn
=
update_personalized_model
,
inputs
=
[],
outputs
=
[
base_model_dropdown
,
lora_model_dropdown
])
with
gr
.
Column
(
variant
=
"panel"
):
gr
.
Markdown
(
"""
### 2. Configs for AnimateDiff.
"""
)
prompt_textbox
=
gr
.
Textbox
(
label
=
"Prompt"
,
lines
=
2
)
negative_prompt_textbox
=
gr
.
Textbox
(
label
=
"Negative prompt"
,
lines
=
2
)
with
gr
.
Row
().
style
(
equal_height
=
False
):
with
gr
.
Column
():
with
gr
.
Row
():
sampler_dropdown
=
gr
.
Dropdown
(
label
=
"Sampling method"
,
choices
=
list
(
scheduler_dict
.
keys
()),
value
=
list
(
scheduler_dict
.
keys
())[
0
])
sample_step_slider
=
gr
.
Slider
(
label
=
"Sampling steps"
,
value
=
25
,
minimum
=
10
,
maximum
=
100
,
step
=
1
)
width_slider
=
gr
.
Slider
(
label
=
"Width"
,
value
=
512
,
minimum
=
256
,
maximum
=
1024
,
step
=
64
)
height_slider
=
gr
.
Slider
(
label
=
"Height"
,
value
=
512
,
minimum
=
256
,
maximum
=
1024
,
step
=
64
)
length_slider
=
gr
.
Slider
(
label
=
"Animation length"
,
value
=
16
,
minimum
=
8
,
maximum
=
24
,
step
=
1
)
cfg_scale_slider
=
gr
.
Slider
(
label
=
"CFG Scale"
,
value
=
7.5
,
minimum
=
0
,
maximum
=
20
)
with
gr
.
Row
():
seed_textbox
=
gr
.
Textbox
(
label
=
"Seed"
,
value
=-
1
)
seed_button
=
gr
.
Button
(
value
=
"
\U0001F3B2
"
,
elem_classes
=
"toolbutton"
)
seed_button
.
click
(
fn
=
lambda
:
gr
.
Textbox
.
update
(
value
=
random
.
randint
(
1
,
1e8
)),
inputs
=
[],
outputs
=
[
seed_textbox
])
generate_button
=
gr
.
Button
(
value
=
"Generate"
,
variant
=
'primary'
)
result_video
=
gr
.
Video
(
label
=
"Generated Animation"
,
interactive
=
False
)
generate_button
.
click
(
fn
=
controller
.
animate
,
inputs
=
[
stable_diffusion_dropdown
,
motion_module_dropdown
,
base_model_dropdown
,
lora_alpha_slider
,
prompt_textbox
,
negative_prompt_textbox
,
sampler_dropdown
,
sample_step_slider
,
width_slider
,
length_slider
,
height_slider
,
cfg_scale_slider
,
seed_textbox
,
],
outputs
=
[
result_video
]
)
return
demo
if
__name__
==
"__main__"
:
demo
=
ui
()
demo
.
launch
(
share
=
True
)
configs/inference/inference-v1.yaml
0 → 100644
View file @
214c357b
unet_additional_kwargs
:
unet_use_cross_frame_attention
:
false
unet_use_temporal_attention
:
false
use_motion_module
:
true
motion_module_resolutions
:
[
1
,
2
,
4
,
8
]
motion_module_mid_block
:
false
motion_module_decoder_only
:
false
motion_module_type
:
"
Vanilla"
motion_module_kwargs
:
num_attention_heads
:
8
num_transformer_block
:
1
attention_block_types
:
[
"
Temporal_Self"
,
"
Temporal_Self"
]
temporal_position_encoding
:
true
temporal_position_encoding_max_len
:
24
temporal_attention_dim_div
:
1
noise_scheduler_kwargs
:
beta_start
:
0.00085
beta_end
:
0.012
beta_schedule
:
"
linear"
steps_offset
:
1
clip_sample
:
False
configs/inference/inference-v2.yaml
0 → 100644
View file @
214c357b
unet_additional_kwargs
:
use_inflated_groupnorm
:
true
unet_use_cross_frame_attention
:
false
unet_use_temporal_attention
:
false
use_motion_module
:
true
motion_module_resolutions
:
[
1
,
2
,
4
,
8
]
motion_module_mid_block
:
true
motion_module_decoder_only
:
false
motion_module_type
:
"
Vanilla"
motion_module_kwargs
:
num_attention_heads
:
8
num_transformer_block
:
1
attention_block_types
:
[
"
Temporal_Self"
,
"
Temporal_Self"
]
temporal_position_encoding
:
true
temporal_position_encoding_max_len
:
32
temporal_attention_dim_div
:
1
noise_scheduler_kwargs
:
beta_start
:
0.00085
beta_end
:
0.012
beta_schedule
:
"
linear"
steps_offset
:
1
clip_sample
:
False
configs/inference/inference-v3.yaml
0 → 100644
View file @
214c357b
unet_additional_kwargs
:
use_inflated_groupnorm
:
true
use_motion_module
:
true
motion_module_resolutions
:
[
1
,
2
,
4
,
8
]
motion_module_mid_block
:
false
motion_module_type
:
Vanilla
motion_module_kwargs
:
num_attention_heads
:
8
num_transformer_block
:
1
attention_block_types
:
[
"
Temporal_Self"
,
"
Temporal_Self"
]
temporal_position_encoding
:
true
temporal_position_encoding_max_len
:
32
temporal_attention_dim_div
:
1
zero_initialize
:
true
noise_scheduler_kwargs
:
beta_start
:
0.00085
beta_end
:
0.012
beta_schedule
:
"
linear"
steps_offset
:
1
clip_sample
:
False
configs/inference/sparsectrl/image_condition.yaml
0 → 100644
View file @
214c357b
controlnet_additional_kwargs
:
set_noisy_sample_input_to_zero
:
true
use_simplified_condition_embedding
:
false
conditioning_channels
:
3
use_motion_module
:
true
motion_module_resolutions
:
[
1
,
2
,
4
,
8
]
motion_module_mid_block
:
false
motion_module_type
:
"
Vanilla"
motion_module_kwargs
:
num_attention_heads
:
8
num_transformer_block
:
1
attention_block_types
:
[
"
Temporal_Self"
]
temporal_position_encoding
:
true
temporal_position_encoding_max_len
:
32
temporal_attention_dim_div
:
1
configs/inference/sparsectrl/latent_condition.yaml
0 → 100644
View file @
214c357b
controlnet_additional_kwargs
:
set_noisy_sample_input_to_zero
:
true
use_simplified_condition_embedding
:
true
conditioning_channels
:
4
use_motion_module
:
true
motion_module_resolutions
:
[
1
,
2
,
4
,
8
]
motion_module_mid_block
:
false
motion_module_type
:
"
Vanilla"
motion_module_kwargs
:
num_attention_heads
:
8
num_transformer_block
:
1
attention_block_types
:
[
"
Temporal_Self"
]
temporal_position_encoding
:
true
temporal_position_encoding_max_len
:
32
temporal_attention_dim_div
:
1
configs/prompts/v1/v1-1-ToonYou.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/toonyou_beta6.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
10788741199826055526
,
6520604954829636163
,
6519455744612555650
,
16372571278361863751
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
best
quality,
masterpiece,
1girl,
looking
at
viewer,
blurry
background,
upper
body,
contemporary,
dress"
-
"
masterpiece,
best
quality,
1girl,
solo,
cherry
blossoms,
hanami,
pink
flower,
white
flower,
spring
season,
wisteria,
petals,
flower,
plum
blossoms,
outdoors,
falling
petals,
white
hair,
black
eyes,"
-
"
best
quality,
masterpiece,
1boy,
formal,
abstract,
looking
at
viewer,
masculine,
marble
pattern"
-
"
best
quality,
masterpiece,
1girl,
cloudy
sky,
dandelion,
contrapposto,
alternate
hairstyle,"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/toonyou_beta6.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
10788741199826055526
,
6520604954829636163
,
6519455744612555650
,
16372571278361863751
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
best
quality,
masterpiece,
1girl,
looking
at
viewer,
blurry
background,
upper
body,
contemporary,
dress"
-
"
masterpiece,
best
quality,
1girl,
solo,
cherry
blossoms,
hanami,
pink
flower,
white
flower,
spring
season,
wisteria,
petals,
flower,
plum
blossoms,
outdoors,
falling
petals,
white
hair,
black
eyes,"
-
"
best
quality,
masterpiece,
1boy,
formal,
abstract,
looking
at
viewer,
masculine,
marble
pattern"
-
"
best
quality,
masterpiece,
1girl,
cloudy
sky,
dandelion,
contrapposto,
alternate
hairstyle,"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
configs/prompts/v1/v1-2-Lyriel.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/lyriel_v16.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
10917152860782582783
,
6399018107401806238
,
15875751942533906793
,
6653196880059936551
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
dark
shot,
epic
realistic,
portrait
of
halo,
sunglasses,
blue
eyes,
tartan
scarf,
white
hair
by
atey
ghailan,
by
greg
rutkowski,
by
greg
tocchini,
by
james
gilleard,
by
joe
fenton,
by
kaethe
butcher,
gradient
yellow,
black,
brown
and
magenta
color
scheme,
grunge
aesthetic!!!
graffiti
tag
wall
background,
art
by
greg
rutkowski
and
artgerm,
soft
cinematic
light,
adobe
lightroom,
photolab,
hdr,
intricate,
highly
detailed,
depth
of
field,
faded,
neutral
colors,
hdr,
muted
colors,
hyperdetailed,
artstation,
cinematic,
warm
lights,
dramatic
light,
intricate
details,
complex
background,
rutkowski,
teal
and
orange"
-
"
A
forbidden
castle
high
up
in
the
mountains,
pixel
art,
intricate
details2,
hdr,
intricate
details,
hyperdetailed5,
natural
skin
texture,
hyperrealism,
soft
light,
sharp,
game
art,
key
visual,
surreal"
-
"
dark
theme,
medieval
portrait
of
a
man
sharp
features,
grim,
cold
stare,
dark
colors,
Volumetric
lighting,
baroque
oil
painting
by
Greg
Rutkowski,
Artgerm,
WLOP,
Alphonse
Mucha
dynamic
lighting
hyperdetailed
intricately
detailed,
hdr,
muted
colors,
complex
background,
hyperrealism,
hyperdetailed,
amandine
van
ray"
-
"
As
I
have
gone
alone
in
there
and
with
my
treasures
bold,
I
can
keep
my
secret
where
and
hint
of
riches
new
and
old.
Begin
it
where
warm
waters
halt
and
take
it
in
a
canyon
down,
not
far
but
too
far
to
walk,
put
in
below
the
home
of
brown."
n_prompt
:
-
"
3d,
cartoon,
lowres,
bad
anatomy,
bad
hands,
text,
error,
missing
fingers,
extra
digit,
fewer
digits,
cropped,
worst
quality,
low
quality,
normal
quality,
jpeg
artifacts,
signature,
watermark,
username,
blurry,
artist
name,
young,
loli,
elf,
3d,
illustration"
-
"
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
girl,
loli,
young,
large
breasts,
red
eyes,
muscular"
-
"
dof,
grayscale,
black
and
white,
bw,
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
girl,
loli,
young,
large
breasts,
red
eyes,
muscular,badhandsv5-neg,
By
bad
artist
-neg
1,
monochrome"
-
"
holding
an
item,
cowboy,
hat,
cartoon,
3d,
disfigured,
bad
art,
deformed,extra
limbs,close
up,b&w,
wierd
colors,
blurry,
duplicate,
morbid,
mutilated,
[out
of
frame],
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
ugly,
blurry,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
out
of
frame,
ugly,
extra
limbs,
bad
anatomy,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
mutated
hands,
fused
fingers,
too
many
fingers,
long
neck,
Photoshop,
video
game,
ugly,
tiling,
poorly
drawn
hands,
poorly
drawn
feet,
poorly
drawn
face,
out
of
frame,
mutation,
mutated,
extra
limbs,
extra
legs,
extra
arms,
disfigured,
deformed,
cross-eye,
body
out
of
frame,
blurry,
bad
art,
bad
anatomy,
3d
render"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/lyriel_v16.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
10917152860782582783
,
6399018107401806238
,
15875751942533906793
,
6653196880059936551
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
dark
shot,
epic
realistic,
portrait
of
halo,
sunglasses,
blue
eyes,
tartan
scarf,
white
hair
by
atey
ghailan,
by
greg
rutkowski,
by
greg
tocchini,
by
james
gilleard,
by
joe
fenton,
by
kaethe
butcher,
gradient
yellow,
black,
brown
and
magenta
color
scheme,
grunge
aesthetic!!!
graffiti
tag
wall
background,
art
by
greg
rutkowski
and
artgerm,
soft
cinematic
light,
adobe
lightroom,
photolab,
hdr,
intricate,
highly
detailed,
depth
of
field,
faded,
neutral
colors,
hdr,
muted
colors,
hyperdetailed,
artstation,
cinematic,
warm
lights,
dramatic
light,
intricate
details,
complex
background,
rutkowski,
teal
and
orange"
-
"
A
forbidden
castle
high
up
in
the
mountains,
pixel
art,
intricate
details2,
hdr,
intricate
details,
hyperdetailed5,
natural
skin
texture,
hyperrealism,
soft
light,
sharp,
game
art,
key
visual,
surreal"
-
"
dark
theme,
medieval
portrait
of
a
man
sharp
features,
grim,
cold
stare,
dark
colors,
Volumetric
lighting,
baroque
oil
painting
by
Greg
Rutkowski,
Artgerm,
WLOP,
Alphonse
Mucha
dynamic
lighting
hyperdetailed
intricately
detailed,
hdr,
muted
colors,
complex
background,
hyperrealism,
hyperdetailed,
amandine
van
ray"
-
"
As
I
have
gone
alone
in
there
and
with
my
treasures
bold,
I
can
keep
my
secret
where
and
hint
of
riches
new
and
old.
Begin
it
where
warm
waters
halt
and
take
it
in
a
canyon
down,
not
far
but
too
far
to
walk,
put
in
below
the
home
of
brown."
n_prompt
:
-
"
3d,
cartoon,
lowres,
bad
anatomy,
bad
hands,
text,
error,
missing
fingers,
extra
digit,
fewer
digits,
cropped,
worst
quality,
low
quality,
normal
quality,
jpeg
artifacts,
signature,
watermark,
username,
blurry,
artist
name,
young,
loli,
elf,
3d,
illustration"
-
"
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
girl,
loli,
young,
large
breasts,
red
eyes,
muscular"
-
"
dof,
grayscale,
black
and
white,
bw,
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
girl,
loli,
young,
large
breasts,
red
eyes,
muscular,badhandsv5-neg,
By
bad
artist
-neg
1,
monochrome"
-
"
holding
an
item,
cowboy,
hat,
cartoon,
3d,
disfigured,
bad
art,
deformed,extra
limbs,close
up,b&w,
wierd
colors,
blurry,
duplicate,
morbid,
mutilated,
[out
of
frame],
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
ugly,
blurry,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
out
of
frame,
ugly,
extra
limbs,
bad
anatomy,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
mutated
hands,
fused
fingers,
too
many
fingers,
long
neck,
Photoshop,
video
game,
ugly,
tiling,
poorly
drawn
hands,
poorly
drawn
feet,
poorly
drawn
face,
out
of
frame,
mutation,
mutated,
extra
limbs,
extra
legs,
extra
arms,
disfigured,
deformed,
cross-eye,
body
out
of
frame,
blurry,
bad
art,
bad
anatomy,
3d
render"
configs/prompts/v1/v1-3-RcnzCartoon.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/rcnzCartoon3d_v10.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
16931037867122267877
,
2094308009433392066
,
4292543217695451092
,
15572665120852309890
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
Jane
Eyre
with
headphones,
natural
skin
texture,4mm,k
textures,
soft
cinematic
light,
adobe
lightroom,
photolab,
hdr,
intricate,
elegant,
highly
detailed,
sharp
focus,
cinematic
look,
soothing
tones,
insane
details,
intricate
details,
hyperdetailed,
low
contrast,
soft
cinematic
light,
dim
colors,
exposure
blend,
hdr,
faded"
-
"
close
up
Portrait
photo
of
muscular
bearded
guy
in
a
worn
mech
suit,
light
bokeh,
intricate,
steel
metal
[rust],
elegant,
sharp
focus,
photo
by
greg
rutkowski,
soft
lighting,
vibrant
colors,
masterpiece,
streets,
detailed
face"
-
"
absurdres,
photorealistic,
masterpiece,
a
30
year
old
man
with
gold
framed,
aviator
reading
glasses
and
a
black
hooded
jacket
and
a
beard,
professional
photo,
a
character
portrait,
altermodern,
detailed
eyes,
detailed
lips,
detailed
face,
grey
eyes"
-
"
a
golden
labrador,
warm
vibrant
colours,
natural
lighting,
dappled
lighting,
diffused
lighting,
absurdres,
highres,k,
uhd,
hdr,
rtx,
unreal,
octane
render,
RAW
photo,
photorealistic,
global
illumination,
subsurface
scattering"
n_prompt
:
-
"
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
mutated
hands
and
fingers,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
blurry,
amputation"
-
"
nude,
cross
eyed,
tongue,
open
mouth,
inside,
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
red
eyes,
muscular"
-
"
easynegative,
cartoon,
anime,
sketches,
necklace,
earrings
worst
quality,
low
quality,
normal
quality,
bad
anatomy,
bad
hands,
shiny
skin,
error,
missing
fingers,
extra
digit,
fewer
digits,
jpeg
artifacts,
signature,
watermark,
username,
blurry,
chubby,
anorectic,
bad
eyes,
old,
wrinkled
skin,
red
skin,
photograph
By
bad
artist
-neg,
big
eyes,
muscular
face,"
-
"
beard,
EasyNegative,
lowres,
chromatic
aberration,
depth
of
field,
motion
blur,
blurry,
bokeh,
bad
quality,
worst
quality,
multiple
arms,
badhand"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/rcnzCartoon3d_v10.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
16931037867122267877
,
2094308009433392066
,
4292543217695451092
,
15572665120852309890
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
Jane
Eyre
with
headphones,
natural
skin
texture,4mm,k
textures,
soft
cinematic
light,
adobe
lightroom,
photolab,
hdr,
intricate,
elegant,
highly
detailed,
sharp
focus,
cinematic
look,
soothing
tones,
insane
details,
intricate
details,
hyperdetailed,
low
contrast,
soft
cinematic
light,
dim
colors,
exposure
blend,
hdr,
faded"
-
"
close
up
Portrait
photo
of
muscular
bearded
guy
in
a
worn
mech
suit,
light
bokeh,
intricate,
steel
metal
[rust],
elegant,
sharp
focus,
photo
by
greg
rutkowski,
soft
lighting,
vibrant
colors,
masterpiece,
streets,
detailed
face"
-
"
absurdres,
photorealistic,
masterpiece,
a
30
year
old
man
with
gold
framed,
aviator
reading
glasses
and
a
black
hooded
jacket
and
a
beard,
professional
photo,
a
character
portrait,
altermodern,
detailed
eyes,
detailed
lips,
detailed
face,
grey
eyes"
-
"
a
golden
labrador,
warm
vibrant
colours,
natural
lighting,
dappled
lighting,
diffused
lighting,
absurdres,
highres,k,
uhd,
hdr,
rtx,
unreal,
octane
render,
RAW
photo,
photorealistic,
global
illumination,
subsurface
scattering"
n_prompt
:
-
"
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
mutated
hands
and
fingers,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
blurry,
amputation"
-
"
nude,
cross
eyed,
tongue,
open
mouth,
inside,
3d,
cartoon,
anime,
sketches,
worst
quality,
low
quality,
normal
quality,
lowres,
normal
quality,
monochrome,
grayscale,
skin
spots,
acnes,
skin
blemishes,
bad
anatomy,
red
eyes,
muscular"
-
"
easynegative,
cartoon,
anime,
sketches,
necklace,
earrings
worst
quality,
low
quality,
normal
quality,
bad
anatomy,
bad
hands,
shiny
skin,
error,
missing
fingers,
extra
digit,
fewer
digits,
jpeg
artifacts,
signature,
watermark,
username,
blurry,
chubby,
anorectic,
bad
eyes,
old,
wrinkled
skin,
red
skin,
photograph
By
bad
artist
-neg,
big
eyes,
muscular
face,"
-
"
beard,
EasyNegative,
lowres,
chromatic
aberration,
depth
of
field,
motion
blur,
blurry,
bokeh,
bad
quality,
worst
quality,
multiple
arms,
badhand"
configs/prompts/v1/v1-4-MajicMix.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/majicmixRealistic_v5Preview.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
1572448948722921032
,
1099474677988590681
,
6488833139725635347
,
18339859844376517918
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
1girl,
offshoulder,
light
smile,
shiny
skin
best
quality,
masterpiece,
photorealistic"
-
"
best
quality,
masterpiece,
photorealistic,
1boy,
50
years
old
beard,
dramatic
lighting"
-
"
best
quality,
masterpiece,
photorealistic,
1girl,
light
smile,
shirt
with
collars,
waist
up,
dramatic
lighting,
from
below"
-
"
male,
man,
beard,
bodybuilder,
skinhead,cold
face,
tough
guy,
cowboyshot,
tattoo,
french
windows,
luxury
hotel
masterpiece,
best
quality,
photorealistic"
n_prompt
:
-
"
ng_deepnegative_v1_75t,
badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,
bad
anatomy,
bad
hands,
watermark,
moles"
-
"
nsfw,
ng_deepnegative_v1_75t,badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,watermark,
monochrome"
-
"
nsfw,
ng_deepnegative_v1_75t,badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,watermark,
monochrome"
-
"
nude,
nsfw,
ng_deepnegative_v1_75t,
badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,
bad
anatomy,
bad
hands,
monochrome,
grayscale
watermark,
moles,
people"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/majicmixRealistic_v5Preview.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
1572448948722921032
,
1099474677988590681
,
6488833139725635347
,
18339859844376517918
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
1girl,
offshoulder,
light
smile,
shiny
skin
best
quality,
masterpiece,
photorealistic"
-
"
best
quality,
masterpiece,
photorealistic,
1boy,
50
years
old
beard,
dramatic
lighting"
-
"
best
quality,
masterpiece,
photorealistic,
1girl,
light
smile,
shirt
with
collars,
waist
up,
dramatic
lighting,
from
below"
-
"
male,
man,
beard,
bodybuilder,
skinhead,cold
face,
tough
guy,
cowboyshot,
tattoo,
french
windows,
luxury
hotel
masterpiece,
best
quality,
photorealistic"
n_prompt
:
-
"
ng_deepnegative_v1_75t,
badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,
bad
anatomy,
bad
hands,
watermark,
moles"
-
"
nsfw,
ng_deepnegative_v1_75t,badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,watermark,
monochrome"
-
"
nsfw,
ng_deepnegative_v1_75t,badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,watermark,
monochrome"
-
"
nude,
nsfw,
ng_deepnegative_v1_75t,
badhandv4,
worst
quality,
low
quality,
normal
quality,
lowres,
bad
anatomy,
bad
hands,
monochrome,
grayscale
watermark,
moles,
people"
configs/prompts/v1/v1-5-RealisticVision.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV20_v20.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
5658137986800322009
,
12099779162349365895
,
10499524853910852697
,
16768009035333711932
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
b&w
photo
of
42
y.o
man
in
black
clothes,
bald,
face,
half
body,
body,
high
detailed
skin,
skin
pores,
coastline,
overcast
weather,
wind,
waves,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
close
up
photo
of
a
rabbit,
forest,
haze,
halation,
bloom,
dramatic
atmosphere,
centred,
rule
of
thirds,
200mm
1.4f
macro
shot"
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
night,
b&w
photo
of
old
house,
post
apocalypse,
forest,
storm
weather,
wind,
rocks,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain"
n_prompt
:
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
art,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV20_v20.safetensors"
lora_model_path
:
"
"
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
5658137986800322009
,
12099779162349365895
,
10499524853910852697
,
16768009035333711932
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
b&w
photo
of
42
y.o
man
in
black
clothes,
bald,
face,
half
body,
body,
high
detailed
skin,
skin
pores,
coastline,
overcast
weather,
wind,
waves,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
close
up
photo
of
a
rabbit,
forest,
haze,
halation,
bloom,
dramatic
atmosphere,
centred,
rule
of
thirds,
200mm
1.4f
macro
shot"
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
night,
b&w
photo
of
old
house,
post
apocalypse,
forest,
storm
weather,
wind,
rocks,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain"
n_prompt
:
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
art,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
configs/prompts/v1/v1-6-Tusun.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/moonfilm_reality20.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/TUSUN.safetensors"
lora_alpha
:
0.6
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
10154078483724687116
,
2664393535095473805
,
4231566096207622938
,
1713349740448094493
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
tusuncub
with
its
mouth
open,
blurry,
open
mouth,
fangs,
photo
background,
looking
at
viewer,
tongue,
full
body,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
cute
tusun
with
a
blurry
background,
black
background,
simple
background,
signature,
face,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
cut
tusuncub
walking
in
the
snow,
blurry,
looking
at
viewer,
depth
of
field,
blurry
background,
full
body,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
character
design,
cyberpunk
tusun
kitten
wearing
astronaut
suit,
sci-fic,
realistic
eye
color
and
details,
fluffy,
big
head,
science
fiction,
communist
ideology,
Cyborg,
fantasy,
intense
angle,
soft
lighting,
photograph,
4k,
hyper
detailed,
portrait
wallpaper,
realistic,
photo-realistic,
DSLR,
24
Megapixels,
Full
Frame,
vibrant
details,
octane
render,
finely
detail,
best
quality,
incredibly
absurdres,
robotic
parts,
rim
light,
vibrant
details,
luxurious
cyberpunk,
hyperrealistic,
cable
electric
wires,
microchip,
full
body"
n_prompt
:
-
"
worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
bad
anatomy,
disconnected
limbs,
wrong
body
proportions,
low
quality,
worst
quality,
text,
watermark,
signatre,
logo,
illustration,
painting,
cartoons,
ugly,
easy_negative"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/moonfilm_reality20.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/TUSUN.safetensors"
lora_alpha
:
0.6
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
10154078483724687116
,
2664393535095473805
,
4231566096207622938
,
1713349740448094493
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
tusuncub
with
its
mouth
open,
blurry,
open
mouth,
fangs,
photo
background,
looking
at
viewer,
tongue,
full
body,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
cute
tusun
with
a
blurry
background,
black
background,
simple
background,
signature,
face,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
cut
tusuncub
walking
in
the
snow,
blurry,
looking
at
viewer,
depth
of
field,
blurry
background,
full
body,
solo,
cute
and
lovely,
Beautiful
and
realistic
eye
details,
perfect
anatomy,
Nonsense,
pure
background,
Centered-Shot,
realistic
photo,
photograph,
4k,
hyper
detailed,
DSLR,
24
Megapixels,
8mm
Lens,
Full
Frame,
film
grain,
Global
Illumination,
studio
Lighting,
Award
Winning
Photography,
diffuse
reflection,
ray
tracing"
-
"
character
design,
cyberpunk
tusun
kitten
wearing
astronaut
suit,
sci-fic,
realistic
eye
color
and
details,
fluffy,
big
head,
science
fiction,
communist
ideology,
Cyborg,
fantasy,
intense
angle,
soft
lighting,
photograph,
4k,
hyper
detailed,
portrait
wallpaper,
realistic,
photo-realistic,
DSLR,
24
Megapixels,
Full
Frame,
vibrant
details,
octane
render,
finely
detail,
best
quality,
incredibly
absurdres,
robotic
parts,
rim
light,
vibrant
details,
luxurious
cyberpunk,
hyperrealistic,
cable
electric
wires,
microchip,
full
body"
n_prompt
:
-
"
worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
bad
anatomy,
disconnected
limbs,
wrong
body
proportions,
low
quality,
worst
quality,
text,
watermark,
signatre,
logo,
illustration,
painting,
cartoons,
ugly,
easy_negative"
configs/prompts/v1/v1-7-FilmVelvia.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/majicmixRealistic_v4.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/FilmVelvia2.safetensors"
lora_alpha
:
0.6
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
358675358833372813
,
3519455280971923743
,
11684545350557985081
,
8696855302100399877
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
a
woman
standing
on
the
side
of
a
road
at
night,girl,
long
hair,
motor
vehicle,
car,
looking
at
viewer,
ground
vehicle,
night,
hands
in
pockets,
blurry
background,
coat,
black
hair,
parted
lips,
bokeh,
jacket,
brown
hair,
outdoors,
red
lips,
upper
body,
artist
name"
-
"
,
dark
shot,0mm,
portrait
quality
of
a
arab
man
worker,boy,
wasteland
that
stands
out
vividly
against
the
background
of
the
desert,
barren
landscape,
closeup,
moles
skin,
soft
light,
sharp,
exposure
blend,
medium
shot,
bokeh,
hdr,
high
contrast,
cinematic,
teal
and
orange5,
muted
colors,
dim
colors,
soothing
tones,
low
saturation,
hyperdetailed,
noir"
-
"
fashion
photography
portrait
of
1girl,
offshoulder,
fluffy
short
hair,
soft
light,
rim
light,
beautiful
shadow,
low
key,
photorealistic,
raw
photo,
natural
skin
texture,
realistic
eye
and
face
details,
hyperrealism,
ultra
high
res,
4K,
Best
quality,
masterpiece,
necklace,
cleavage,
in
the
dark"
-
"
In
this
lighthearted
portrait,
a
woman
is
dressed
as
a
fierce
warrior,
armed
with
an
arsenal
of
paintbrushes
and
palette
knives.
Her
war
paint
is
composed
of
thick,
vibrant
strokes
of
color,
and
her
armor
is
made
of
paint
tubes
and
paint-splattered
canvases.
She
stands
victoriously
atop
a
mountain
of
conquered
blank
canvases,
with
a
beautiful,
colorful
landscape
behind
her,
symbolizing
the
power
of
art
and
creativity.
bust
Portrait,
close-up,
Bright
and
transparent
scene
lighting,
"
n_prompt
:
-
"
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
wrong
white
balance,
dark,
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
wrong
white
balance,
dark,
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/majicmixRealistic_v4.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/FilmVelvia2.safetensors"
lora_alpha
:
0.6
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
358675358833372813
,
3519455280971923743
,
11684545350557985081
,
8696855302100399877
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
a
woman
standing
on
the
side
of
a
road
at
night,girl,
long
hair,
motor
vehicle,
car,
looking
at
viewer,
ground
vehicle,
night,
hands
in
pockets,
blurry
background,
coat,
black
hair,
parted
lips,
bokeh,
jacket,
brown
hair,
outdoors,
red
lips,
upper
body,
artist
name"
-
"
,
dark
shot,0mm,
portrait
quality
of
a
arab
man
worker,boy,
wasteland
that
stands
out
vividly
against
the
background
of
the
desert,
barren
landscape,
closeup,
moles
skin,
soft
light,
sharp,
exposure
blend,
medium
shot,
bokeh,
hdr,
high
contrast,
cinematic,
teal
and
orange5,
muted
colors,
dim
colors,
soothing
tones,
low
saturation,
hyperdetailed,
noir"
-
"
fashion
photography
portrait
of
1girl,
offshoulder,
fluffy
short
hair,
soft
light,
rim
light,
beautiful
shadow,
low
key,
photorealistic,
raw
photo,
natural
skin
texture,
realistic
eye
and
face
details,
hyperrealism,
ultra
high
res,
4K,
Best
quality,
masterpiece,
necklace,
cleavage,
in
the
dark"
-
"
In
this
lighthearted
portrait,
a
woman
is
dressed
as
a
fierce
warrior,
armed
with
an
arsenal
of
paintbrushes
and
palette
knives.
Her
war
paint
is
composed
of
thick,
vibrant
strokes
of
color,
and
her
armor
is
made
of
paint
tubes
and
paint-splattered
canvases.
She
stands
victoriously
atop
a
mountain
of
conquered
blank
canvases,
with
a
beautiful,
colorful
landscape
behind
her,
symbolizing
the
power
of
art
and
creativity.
bust
Portrait,
close-up,
Bright
and
transparent
scene
lighting,
"
n_prompt
:
-
"
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
wrong
white
balance,
dark,
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
-
"
wrong
white
balance,
dark,
cartoon,
anime,
sketches,worst
quality,
low
quality,
deformed,
distorted,
disfigured,
bad
eyes,
wrong
lips,
weird
mouth,
bad
teeth,
mutated
hands
and
fingers,
bad
anatomy,
wrong
anatomy,
amputation,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
ugly,
disgusting,
bad_pictures,
negative_hand-neg"
configs/prompts/v1/v1-8-GhibliBackground.yaml
0 → 100644
View file @
214c357b
# motion module v1_14
-
dreambooth_path
:
"
models/DreamBooth_LoRA/CounterfeitV30_25.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/lora_Ghibli_n3.safetensors"
lora_alpha
:
1.0
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v14.ckpt"
seed
:
[
8775748474469046618
,
5893874876080607656
,
11911465742147695752
,
12437784838692000640
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
best
quality,single
build,architecture,
blue_sky,
building,cloudy_sky,
day,
fantasy,
fence,
field,
house,
build,architecture,landscape,
moss,
outdoors,
overgrown,
path,
river,
road,
rock,
scenery,
sky,
sword,
tower,
tree,
waterfall"
-
"
black_border,
building,
city,
day,
fantasy,
ice,
landscape,
letterboxed,
mountain,
ocean,
outdoors,
planet,
scenery,
ship,
snow,
snowing,
water,
watercraft,
waterfall,
winter"
-
"
,mysterious
sea
area,
fantasy,build,concept"
-
"
Tomb
Raider,Scenography,Old
building"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
# motion module v1_15
-
dreambooth_path
:
"
models/DreamBooth_LoRA/CounterfeitV30_25.safetensors"
lora_model_path
:
"
models/DreamBooth_LoRA/lora_Ghibli_n3.safetensors"
lora_alpha
:
1.0
inference_config
:
"
configs/inference/inference-v1.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15.ckpt"
seed
:
[
8775748474469046618
,
5893874876080607656
,
11911465742147695752
,
12437784838692000640
]
steps
:
25
guidance_scale
:
8
prompt
:
-
"
best
quality,single
build,architecture,
blue_sky,
building,cloudy_sky,
day,
fantasy,
fence,
field,
house,
build,architecture,landscape,
moss,
outdoors,
overgrown,
path,
river,
road,
rock,
scenery,
sky,
sword,
tower,
tree,
waterfall"
-
"
black_border,
building,
city,
day,
fantasy,
ice,
landscape,
letterboxed,
mountain,
ocean,
outdoors,
planet,
scenery,
ship,
snow,
snowing,
water,
watercraft,
waterfall,
winter"
-
"
,mysterious
sea
area,
fantasy,build,concept"
-
"
Tomb
Raider,Scenography,Old
building"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
configs/prompts/v2/v2-1-RealisticVision.yaml
0 → 100644
View file @
214c357b
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
[
13100322578370451493
,
14752961627088720670
,
9329399085567825781
,
16987697414827649302
]
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
b&w
photo
of
42
y.o
man
in
black
clothes,
bald,
face,
half
body,
body,
high
detailed
skin,
skin
pores,
coastline,
overcast
weather,
wind,
waves,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
close
up
photo
of
a
rabbit,
forest,
haze,
halation,
bloom,
dramatic
atmosphere,
centred,
rule
of
thirds,
200mm
1.4f
macro
shot"
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
-
"
night,
b&w
photo
of
old
house,
post
apocalypse,
forest,
storm
weather,
wind,
rocks,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain"
n_prompt
:
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
text,
close
up,
cropped,
out
of
frame,
worst
quality,
low
quality,
jpeg
artifacts,
ugly,
duplicate,
morbid,
mutilated,
extra
fingers,
mutated
hands,
poorly
drawn
hands,
poorly
drawn
face,
mutation,
deformed,
blurry,
dehydrated,
bad
anatomy,
bad
proportions,
extra
limbs,
cloned
face,
disfigured,
gross
proportions,
malformed
limbs,
missing
arms,
missing
legs,
extra
arms,
extra
legs,
fused
fingers,
too
many
fingers,
long
neck"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
art,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
configs/prompts/v2/v2-2-RealisticVision-MotionLoRA.yaml
0 → 100644
View file @
214c357b
# ZoomIn
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_ZoomIn.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# ZoomOut
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_ZoomOut.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# PanLeft
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_PanLeft.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# PanRight
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_PanRight.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# TiltUp
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_TiltUp.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# TiltDown
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_TiltDown.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# RollingAnticlockwise
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_RollingAnticlockwise.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
# RollingClockwise
-
inference_config
:
"
configs/inference/inference-v2.yaml"
motion_module
:
"
models/Motion_Module/mm_sd_v15_v2.ckpt"
motion_module_lora_configs
:
-
path
:
"
models/MotionLoRA/v2_lora_RollingClockwise.ckpt"
alpha
:
1.0
dreambooth_path
:
"
models/DreamBooth_LoRA/realisticVisionV51_v51VAE.safetensors"
lora_model_path
:
"
"
seed
:
45987230
steps
:
25
guidance_scale
:
7.5
prompt
:
-
"
photo
of
coastline,
rocks,
storm
weather,
wind,
waves,
lightning,
8k
uhd,
dslr,
soft
lighting,
high
quality,
film
grain,
Fujifilm
XT3"
n_prompt
:
-
"
blur,
haze,
deformed
iris,
deformed
pupils,
semi-realistic,
cgi,
3d,
render,
sketch,
cartoon,
drawing,
anime,
mutated
hands
and
fingers,
deformed,
distorted,
disfigured,
poorly
drawn,
bad
anatomy,
wrong
anatomy,
extra
limb,
missing
limb,
floating
limbs,
disconnected
limbs,
mutation,
mutated,
ugly,
disgusting,
amputation"
configs/prompts/v3/v3-1-T2V.yaml
0 → 100644
View file @
214c357b
# 1-animation
-
domain_lora_scale
:
1.0
adapter_lora_path
:
"
models/Motion_Module/v3_sd15_adapter.ckpt"
dreambooth_path
:
"
"
inference_config
:
"
configs/inference/inference-v3.yaml"
motion_module
:
"
models/Motion_Module/v3_sd15_mm.ckpt"
controlnet_config
:
"
configs/inference/sparsectrl/latent_condition.yaml"
controlnet_path
:
"
models/SparseCtrl/v3_sd15_sparsectrl_rgb.ckpt"
H
:
256
W
:
384
seed
:
[
123
,
234
]
steps
:
25
guidance_scale
:
8.5
controlnet_image_indexs
:
[
0
]
controlnet_images
:
-
"
__assets__/demos/image/painting.png"
prompt
:
-
an oil painting of a sailboat in the ocean wave
-
an oil painting of a sailboat in the ocean wave
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
# 2-interpolation
-
domain_lora_scale
:
1.0
adapter_lora_path
:
"
models/Motion_Module/v3_sd15_adapter.ckpt"
dreambooth_path
:
"
"
inference_config
:
"
configs/inference/inference-v3.yaml"
motion_module
:
"
models/Motion_Module/v3_sd15_mm.ckpt"
controlnet_config
:
"
configs/inference/sparsectrl/latent_condition.yaml"
controlnet_path
:
"
models/SparseCtrl/v3_sd15_sparsectrl_rgb.ckpt"
H
:
256
W
:
384
seed
:
[
123
,
234
]
steps
:
25
guidance_scale
:
8.5
controlnet_image_indexs
:
[
0
,
-1
]
controlnet_images
:
-
"
__assets__/demos/image/interpolation_1.png"
-
"
__assets__/demos/image/interpolation_2.png"
prompt
:
-
"
aerial
view,
beautiful
forest,
autumn,
4k,
high
quality"
-
"
aerial
view,
beautiful
forest,
autumn,
4k,
high
quality"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
# 3-interpolation
-
domain_lora_scale
:
1.0
adapter_lora_path
:
"
models/Motion_Module/v3_sd15_adapter.ckpt"
dreambooth_path
:
"
"
inference_config
:
"
configs/inference/inference-v3.yaml"
motion_module
:
"
models/Motion_Module/v3_sd15_mm.ckpt"
controlnet_config
:
"
configs/inference/sparsectrl/latent_condition.yaml"
controlnet_path
:
"
models/SparseCtrl/v3_sd15_sparsectrl_rgb.ckpt"
H
:
256
W
:
384
seed
:
[
123
,
234
]
steps
:
25
guidance_scale
:
8.5
controlnet_image_indexs
:
[
0
,
5
,
10
,
15
]
controlnet_images
:
-
"
__assets__/demos/image/low_fps_1.png"
-
"
__assets__/demos/image/low_fps_2.png"
-
"
__assets__/demos/image/low_fps_3.png"
-
"
__assets__/demos/image/low_fps_4.png"
prompt
:
-
"
two
people
holding
hands
in
a
field
with
wind
turbines
in
the
background"
-
"
two
people
holding
hands
in
a
field
with
wind
turbines
in
the
background"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
# 3-prediction
-
domain_lora_scale
:
1.0
adapter_lora_path
:
"
models/Motion_Module/v3_sd15_adapter.ckpt"
dreambooth_path
:
"
"
inference_config
:
"
configs/inference/inference-v3.yaml"
motion_module
:
"
models/Motion_Module/v3_sd15_mm.ckpt"
controlnet_config
:
"
configs/inference/sparsectrl/latent_condition.yaml"
controlnet_path
:
"
models/SparseCtrl/v3_sd15_sparsectrl_rgb.ckpt"
H
:
256
W
:
384
seed
:
[
123
,
234
]
steps
:
25
guidance_scale
:
8.5
controlnet_image_indexs
:
[
0
,
1
,
2
,
3
]
controlnet_images
:
-
"
__assets__/demos/image/prediction_1.png"
-
"
__assets__/demos/image/prediction_2.png"
-
"
__assets__/demos/image/prediction_3.png"
-
"
__assets__/demos/image/prediction_4.png"
prompt
:
-
"
an
astronaut
is
flying
in
the
space,
4k,
high
resolution"
-
"
an
astronaut
is
flying
in
the
space,
4k,
high
resolution"
n_prompt
:
-
"
worst
quality,
low
quality,
letterboxed"
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment