Commit 1336a33d authored by zzg_666's avatar zzg_666
Browse files

wan2.2

parents
This diff is collapsed.
icon.png

70.5 KB

# 模型唯一标识
modelCode = 1810
# 模型名称
modelName=Wan2.2_pytorch
# 模型描述
modelDescription=Wan2.2是阿里开源的多模态视频生成模型,支持文本、图像、语音驱动,采用MoE架构,消费级显卡可跑,开源可商用,画质达1080p,内置电影级光影控制,生成高效。
# 应用场景
processType=推理
# 算法类别
appScenario=视频生成
# 框架类型
frameType=pytorch
# 加速卡类型
accelerateType=BW1000,K100AI
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "wan"
version = "2.2.0"
description = "Wan: Open and Advanced Large-Scale Video Generative Models"
authors = [
{ name = "Wan Team", email = "wan.ai@alibabacloud.com" }
]
license = { file = "LICENSE.txt" }
readme = "README.md"
requires-python = ">=3.10,<4.0"
dependencies = [
"torch>=2.4.0",
"torchvision>=0.19.0",
"opencv-python>=4.9.0.80",
"diffusers>=0.31.0",
"transformers>=4.49.0",
"tokenizers>=0.20.3",
"accelerate>=1.1.1",
"tqdm",
"imageio",
"easydict",
"ftfy",
"dashscope",
"imageio-ffmpeg",
"flash_attn",
"numpy>=1.23.5,<2"
]
[project.optional-dependencies]
dev = [
"pytest",
"black",
"flake8",
"isort",
"mypy",
"huggingface-hub[cli]"
]
[project.urls]
homepage = "https://wanxai.com"
documentation = "https://github.com/Wan-Video/Wan2.2"
repository = "https://github.com/Wan-Video/Wan2.2"
huggingface = "https://huggingface.co/Wan-AI/"
modelscope = "https://modelscope.cn/organization/Wan-AI"
discord = "https://discord.gg/p5XbdQV7"
[tool.setuptools]
packages = ["wan"]
[tool.setuptools.package-data]
"wan" = ["**/*.py"]
[tool.black]
line-length = 88
[tool.isort]
profile = "black"
[tool.mypy]
strict = true
#torch>=2.4.0
#torchvision>=0.19.0
#torchaudio
opencv-python>=4.9.0.80
diffusers>=0.31.0
transformers>=4.49.0,<=4.51.3
tokenizers>=0.20.3
accelerate>=1.1.1
tqdm
imageio[ffmpeg]
easydict
ftfy
dashscope
imageio-ffmpeg
#flash_attn
numpy>=1.23.5,<2
decord
peft
onnxruntime
pandas
matplotlib
-e git+https://github.com/facebookresearch/sam2.git@0e78a118995e66bb27d78518c4bd9a3e95b4e266#egg=SAM-2
loguru
sentencepiece
\ No newline at end of file
openai-whisper
HyperPyYAML
#onnxruntime
inflect
wetext
omegaconf
conformer
hydra-core
lightning
rich
gdown
matplotlib
wget
pyarrow
pyworld
librosa
decord
modelscope
GitPython
Put all your models (Wan2.2-T2V-A14B, Wan2.2-I2V-A14B, Wan2.2-TI2V-5B) in a folder and specify the max GPU number you want to use.
```bash
bash ./tests/test.sh <local model dir> <gpu number>
```
#!/bin/bash
set -x
unset NCCL_DEBUG
if [ "$#" -eq 2 ]; then
MODEL_DIR=$(realpath "$1")
GPUS=$2
else
echo "Usage: $0 <local model dir> <gpu number>"
exit 1
fi
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$REPO_ROOT" || exit 1
PY_FILE=./generate.py
function t2v_A14B() {
CKPT_DIR="$MODEL_DIR/Wan2.2-T2V-A14B"
# # 1-GPU Test
# echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B 1-GPU Test: "
# python $PY_FILE --task t2v-A14B --size 480*832 --ckpt_dir $CKPT_DIR
# Multiple GPU Test
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU, prompt extend local_qwen: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
}
function i2v_A14B() {
CKPT_DIR="$MODEL_DIR/Wan2.2-I2V-A14B"
# echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B 1-GPU Test: "
# python $PY_FILE --task i2v-A14B --size 832*480 --ckpt_dir $CKPT_DIR
# Multiple GPU Test
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
if [ -n "${DASH_API_KEY+x}" ]; then
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend dashscope: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_method "dashscope"
else
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> No DASH_API_KEY found, skip the dashscope extend test."
fi
}
function ti2v_5B() {
CKPT_DIR="$MODEL_DIR/Wan2.2-TI2V-5B"
# # 1-GPU Test
# echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v 1-GPU Test: "
# python $PY_FILE --task ti2v-5B --size 1280*704 --ckpt_dir $CKPT_DIR
# Multiple GPU Test
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU, prompt extend local_qwen: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU Test: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU, prompt extend local_qwen: "
torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang 'en' --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
}
t2v_A14B
i2v_A14B
ti2v_5B
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
from . import configs, distributed, modules
from .image2video import WanI2V
from .speech2video import WanS2V
from .text2video import WanT2V
from .textimage2video import WanTI2V
from .animate import WanAnimate
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment