Commit 1336a33d authored by zzg_666's avatar zzg_666
Browse files

wan2.2

parents
This diff is collapsed.
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import copy
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
from .wan_i2v_A14B import i2v_A14B
from .wan_s2v_14B import s2v_14B
from .wan_t2v_A14B import t2v_A14B
from .wan_ti2v_5B import ti2v_5B
from .wan_animate_14B import animate_14B
WAN_CONFIGS = {
't2v-A14B': t2v_A14B,
'i2v-A14B': i2v_A14B,
'ti2v-5B': ti2v_5B,
'animate-14B': animate_14B,
's2v-14B': s2v_14B,
}
SIZE_CONFIGS = {
'720*1280': (720, 1280),
'1280*720': (1280, 720),
'480*832': (480, 832),
'832*480': (832, 480),
'704*1280': (704, 1280),
'1280*704': (1280, 704),
'1024*704': (1024, 704),
'704*1024': (704, 1024),
}
MAX_AREA_CONFIGS = {
'720*1280': 720 * 1280,
'1280*720': 1280 * 720,
'480*832': 480 * 832,
'832*480': 832 * 480,
'704*1280': 704 * 1280,
'1280*704': 1280 * 704,
'1024*704': 1024 * 704,
'704*1024': 704 * 1024,
}
SUPPORTED_SIZES = {
't2v-A14B': ('720*1280', '1280*720', '480*832', '832*480'),
'i2v-A14B': ('720*1280', '1280*720', '480*832', '832*480'),
'ti2v-5B': ('704*1280', '1280*704'),
's2v-14B': ('720*1280', '1280*720', '480*832', '832*480', '1024*704',
'704*1024', '704*1280', '1280*704'),
'animate-14B': ('720*1280', '1280*720')
}
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import torch
from easydict import EasyDict
#------------------------ Wan shared config ------------------------#
wan_shared_cfg = EasyDict()
# t5
wan_shared_cfg.t5_model = 'umt5_xxl'
wan_shared_cfg.t5_dtype = torch.bfloat16
wan_shared_cfg.text_len = 512
# transformer
wan_shared_cfg.param_dtype = torch.bfloat16
# inference
wan_shared_cfg.num_train_timesteps = 1000
wan_shared_cfg.sample_fps = 16
wan_shared_cfg.sample_neg_prompt = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走'
wan_shared_cfg.frame_num = 81
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
from easydict import EasyDict
from .shared_config import wan_shared_cfg
#------------------------ Wan animate 14B ------------------------#
animate_14B = EasyDict(__name__='Config: Wan animate 14B')
animate_14B.update(wan_shared_cfg)
animate_14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
animate_14B.t5_tokenizer = 'google/umt5-xxl'
animate_14B.clip_checkpoint = 'models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth'
animate_14B.clip_tokenizer = 'xlm-roberta-large'
animate_14B.lora_checkpoint = 'relighting_lora.ckpt'
# vae
animate_14B.vae_checkpoint = 'Wan2.1_VAE.pth'
animate_14B.vae_stride = (4, 8, 8)
# transformer
animate_14B.patch_size = (1, 2, 2)
animate_14B.dim = 5120
animate_14B.ffn_dim = 13824
animate_14B.freq_dim = 256
animate_14B.num_heads = 40
animate_14B.num_layers = 40
animate_14B.window_size = (-1, -1)
animate_14B.qk_norm = True
animate_14B.cross_attn_norm = True
animate_14B.eps = 1e-6
animate_14B.use_face_encoder = True
animate_14B.motion_encoder_dim = 512
# inference
animate_14B.sample_shift = 5.0
animate_14B.sample_steps = 20
animate_14B.sample_guide_scale = 1.0
animate_14B.frame_num = 77
animate_14B.sample_fps = 30
animate_14B.prompt = '视频中的人在做动作'
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import torch
from easydict import EasyDict
from .shared_config import wan_shared_cfg
#------------------------ Wan I2V A14B ------------------------#
i2v_A14B = EasyDict(__name__='Config: Wan I2V A14B')
i2v_A14B.update(wan_shared_cfg)
i2v_A14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
i2v_A14B.t5_tokenizer = 'google/umt5-xxl'
# vae
i2v_A14B.vae_checkpoint = 'Wan2.1_VAE.pth'
i2v_A14B.vae_stride = (4, 8, 8)
# transformer
i2v_A14B.patch_size = (1, 2, 2)
i2v_A14B.dim = 5120
i2v_A14B.ffn_dim = 13824
i2v_A14B.freq_dim = 256
i2v_A14B.num_heads = 40
i2v_A14B.num_layers = 40
i2v_A14B.window_size = (-1, -1)
i2v_A14B.qk_norm = True
i2v_A14B.cross_attn_norm = True
i2v_A14B.eps = 1e-6
i2v_A14B.low_noise_checkpoint = 'low_noise_model'
i2v_A14B.high_noise_checkpoint = 'high_noise_model'
# inference
i2v_A14B.sample_shift = 5.0
i2v_A14B.sample_steps = 40
i2v_A14B.boundary = 0.900
i2v_A14B.sample_guide_scale = (3.5, 3.5) # low noise, high noise
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
from easydict import EasyDict
from .shared_config import wan_shared_cfg
#------------------------ Wan S2V 14B ------------------------#
s2v_14B = EasyDict(__name__='Config: Wan S2V 14B')
s2v_14B.update(wan_shared_cfg)
# t5
s2v_14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
s2v_14B.t5_tokenizer = 'google/umt5-xxl'
# vae
s2v_14B.vae_checkpoint = 'Wan2.1_VAE.pth'
s2v_14B.vae_stride = (4, 8, 8)
# wav2vec
s2v_14B.wav2vec = "wav2vec2-large-xlsr-53-english"
s2v_14B.num_heads = 40
# transformer
s2v_14B.transformer = EasyDict(
__name__="Config: Transformer config for WanModel_S2V")
s2v_14B.transformer.patch_size = (1, 2, 2)
s2v_14B.transformer.dim = 5120
s2v_14B.transformer.ffn_dim = 13824
s2v_14B.transformer.freq_dim = 256
s2v_14B.transformer.num_heads = 40
s2v_14B.transformer.num_layers = 40
s2v_14B.transformer.window_size = (-1, -1)
s2v_14B.transformer.qk_norm = True
s2v_14B.transformer.cross_attn_norm = True
s2v_14B.transformer.eps = 1e-6
s2v_14B.transformer.enable_adain = True
s2v_14B.transformer.adain_mode = "attn_norm"
s2v_14B.transformer.audio_inject_layers = [
0, 4, 8, 12, 16, 20, 24, 27, 30, 33, 36, 39
]
s2v_14B.transformer.zero_init = True
s2v_14B.transformer.zero_timestep = True
s2v_14B.transformer.enable_motioner = False
s2v_14B.transformer.add_last_motion = True
s2v_14B.transformer.trainable_token = False
s2v_14B.transformer.enable_tsm = False
s2v_14B.transformer.enable_framepack = True
s2v_14B.transformer.framepack_drop_mode = 'padd'
s2v_14B.transformer.audio_dim = 1024
s2v_14B.transformer.motion_frames = 73
s2v_14B.transformer.cond_dim = 16
# inference
s2v_14B.sample_neg_prompt = "画面模糊,最差质量,画面模糊,细节模糊不清,情绪激动剧烈,手快速抖动,字幕,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
s2v_14B.drop_first_motion = True
s2v_14B.sample_shift = 3
s2v_14B.sample_steps = 40
s2v_14B.sample_guide_scale = 4.5
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
from easydict import EasyDict
from .shared_config import wan_shared_cfg
#------------------------ Wan T2V A14B ------------------------#
t2v_A14B = EasyDict(__name__='Config: Wan T2V A14B')
t2v_A14B.update(wan_shared_cfg)
# t5
t2v_A14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
t2v_A14B.t5_tokenizer = 'google/umt5-xxl'
# vae
t2v_A14B.vae_checkpoint = 'Wan2.1_VAE.pth'
t2v_A14B.vae_stride = (4, 8, 8)
# transformer
t2v_A14B.patch_size = (1, 2, 2)
t2v_A14B.dim = 5120
t2v_A14B.ffn_dim = 13824
t2v_A14B.freq_dim = 256
t2v_A14B.num_heads = 40
t2v_A14B.num_layers = 40
t2v_A14B.window_size = (-1, -1)
t2v_A14B.qk_norm = True
t2v_A14B.cross_attn_norm = True
t2v_A14B.eps = 1e-6
t2v_A14B.low_noise_checkpoint = 'low_noise_model'
t2v_A14B.high_noise_checkpoint = 'high_noise_model'
# inference
t2v_A14B.sample_shift = 12.0
t2v_A14B.sample_steps = 40
t2v_A14B.boundary = 0.875
t2v_A14B.sample_guide_scale = (3.0, 4.0) # low noise, high noise
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
from easydict import EasyDict
from .shared_config import wan_shared_cfg
#------------------------ Wan TI2V 5B ------------------------#
ti2v_5B = EasyDict(__name__='Config: Wan TI2V 5B')
ti2v_5B.update(wan_shared_cfg)
# t5
ti2v_5B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
ti2v_5B.t5_tokenizer = 'google/umt5-xxl'
# vae
ti2v_5B.vae_checkpoint = 'Wan2.2_VAE.pth'
ti2v_5B.vae_stride = (4, 16, 16)
# transformer
ti2v_5B.patch_size = (1, 2, 2)
ti2v_5B.dim = 3072
ti2v_5B.ffn_dim = 14336
ti2v_5B.freq_dim = 256
ti2v_5B.num_heads = 24
ti2v_5B.num_layers = 30
ti2v_5B.window_size = (-1, -1)
ti2v_5B.qk_norm = True
ti2v_5B.cross_attn_norm = True
ti2v_5B.eps = 1e-6
# inference
ti2v_5B.sample_fps = 24
ti2v_5B.sample_shift = 5.0
ti2v_5B.sample_steps = 50
ti2v_5B.sample_guide_scale = 5.0
ti2v_5B.frame_num = 121
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment