wan2.2

1336a33d · zzg_666 · 1336a33d · 1336a33d · 1336a33d · 1336a33d
Commit 1336a33d authored Nov 15, 2025 by zzg_666
20 changed files
--- a/examples/sing.MP3
+++ b/examples/sing.MP3
--- a/examples/talk.wav
+++ b/examples/talk.wav
--- a/examples/wan_animate/animate/image.jpeg
+++ b/examples/wan_animate/animate/image.jpeg
--- a/examples/wan_animate/animate/video.mp4
+++ b/examples/wan_animate/animate/video.mp4
--- a/examples/wan_animate/replace/image.jpeg
+++ b/examples/wan_animate/replace/image.jpeg
--- a/examples/wan_animate/replace/video.mp4
+++ b/examples/wan_animate/replace/video.mp4
--- a/examples/zero_shot_prompt.wav
+++ b/examples/zero_shot_prompt.wav
--- a/generate.py
+++ b/generate.py
--- a/icon.png
+++ b/icon.png
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode = 1810
+# 模型名称
+modelName=Wan2.2_pytorch 
+# 模型描述
+modelDescription=Wan2.2是阿里开源的多模态视频生成模型，支持文本、图像、语音驱动，采用MoE架构，消费级显卡可跑，开源可商用，画质达1080p，内置电影级光影控制，生成高效。
+# 应用场景
+processType=推理
+# 算法类别
+appScenario=视频生成
+# 框架类型
+frameType=pytorch
+# 加速卡类型
+accelerateType=BW1000,K100AI
--- a/pyproject.toml
+++ b/pyproject.toml
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "wan"
+version = "2.2.0"
+description = "Wan: Open and Advanced Large-Scale Video Generative Models"
+authors = [
+    { name = "Wan Team", email = "wan.ai@alibabacloud.com" }
+]
+license = { file = "LICENSE.txt" }
+readme = "README.md"
+requires-python = ">=3.10,<4.0"
+dependencies = [
+    "torch>=2.4.0",
+    "torchvision>=0.19.0",
+    "opencv-python>=4.9.0.80",
+    "diffusers>=0.31.0",
+    "transformers>=4.49.0",
+    "tokenizers>=0.20.3",
+    "accelerate>=1.1.1",
+    "tqdm",
+    "imageio",
+    "easydict",
+    "ftfy",
+    "dashscope",
+    "imageio-ffmpeg",
+    "flash_attn",
+    "numpy>=1.23.5,<2"
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "black",
+    "flake8",
+    "isort",
+    "mypy",
+    "huggingface-hub[cli]"
+]
+
+[project.urls]
+homepage = "https://wanxai.com"
+documentation = "https://github.com/Wan-Video/Wan2.2"
+repository = "https://github.com/Wan-Video/Wan2.2"
+huggingface = "https://huggingface.co/Wan-AI/"
+modelscope = "https://modelscope.cn/organization/Wan-AI"
+discord = "https://discord.gg/p5XbdQV7"
+
+[tool.setuptools]
+packages = ["wan"]
+
+[tool.setuptools.package-data]
+"wan" = ["**/*.py"]
+
+[tool.black]
+line-length = 88
+
+[tool.isort]
+profile = "black"
+
+[tool.mypy]
+strict = true
+
+
--- a/requirements.txt
+++ b/requirements.txt
+#torch>=2.4.0
+#torchvision>=0.19.0
+#torchaudio
+opencv-python>=4.9.0.80
+diffusers>=0.31.0
+transformers>=4.49.0,<=4.51.3
+tokenizers>=0.20.3
+accelerate>=1.1.1
+tqdm
+imageio[ffmpeg]
+easydict
+ftfy
+dashscope
+imageio-ffmpeg
+#flash_attn
+numpy>=1.23.5,<2
--- a/requirements_animate.txt
+++ b/requirements_animate.txt
+decord
+peft
+onnxruntime
+pandas
+matplotlib
+-e  git+https://github.com/facebookresearch/sam2.git@0e78a118995e66bb27d78518c4bd9a3e95b4e266#egg=SAM-2 
+loguru
+sentencepiece
\ No newline at end of file
--- a/requirements_s2v.txt
+++ b/requirements_s2v.txt
+openai-whisper
+HyperPyYAML
+#onnxruntime
+inflect
+wetext
+omegaconf
+conformer
+hydra-core
+lightning
+rich
+gdown
+matplotlib
+wget
+pyarrow
+pyworld
+librosa
+decord
+modelscope
+GitPython
--- a/tests/README.md
+++ b/tests/README.md
+
+Put all your models (Wan2.2-T2V-A14B, Wan2.2-I2V-A14B, Wan2.2-TI2V-5B) in a folder and specify the max GPU number you want to use.
+
+```bash
+bash ./tests/test.sh <local model dir> <gpu number>
+```
--- a/tests/test.sh
+++ b/tests/test.sh
+#!/bin/bash
+set -x
+
+unset NCCL_DEBUG
+
+if [ "$#" -eq 2 ]; then
+  MODEL_DIR=$(realpath "$1")
+  GPUS=$2
+else
+  echo "Usage: $0 <local model dir> <gpu number>"
+  exit 1
+fi
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+REPO_ROOT="$(dirname "$SCRIPT_DIR")"
+cd "$REPO_ROOT" || exit 1
+
+PY_FILE=./generate.py
+
+
+function t2v_A14B() {
+    CKPT_DIR="$MODEL_DIR/Wan2.2-T2V-A14B"
+
+    # # 1-GPU Test
+    # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B 1-GPU Test: "
+    # python $PY_FILE --task t2v-A14B --size 480*832 --ckpt_dir $CKPT_DIR
+
+    # Multiple GPU Test
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU, prompt extend local_qwen: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
+}
+
+
+function i2v_A14B() {
+    CKPT_DIR="$MODEL_DIR/Wan2.2-I2V-A14B"
+
+    # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B 1-GPU Test: "
+    # python $PY_FILE --task i2v-A14B --size 832*480 --ckpt_dir $CKPT_DIR
+
+    # Multiple GPU Test
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
+
+    if [ -n "${DASH_API_KEY+x}" ]; then
+        echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend dashscope: "
+        torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_method "dashscope"
+    else
+        echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> No DASH_API_KEY found, skip the dashscope extend test."
+    fi
+}
+
+function ti2v_5B() {
+    CKPT_DIR="$MODEL_DIR/Wan2.2-TI2V-5B"
+
+    # # 1-GPU Test
+    # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v 1-GPU Test: "
+    # python $PY_FILE --task ti2v-5B --size 1280*704 --ckpt_dir $CKPT_DIR
+
+    # Multiple GPU Test
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU, prompt extend local_qwen: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU Test: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
+
+    echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU, prompt extend local_qwen: "
+    torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang 'en' --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
+
+}
+
+t2v_A14B
+i2v_A14B
+ti2v_5B
--- a/wan/__init__.py
+++ b/wan/__init__.py
+# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
+from . import configs, distributed, modules
+from .image2video import WanI2V
+from .speech2video import WanS2V
+from .text2video import WanT2V
+from .textimage2video import WanTI2V
+from .animate import WanAnimate
\ No newline at end of file
--- a/wan/__pycache__/__init__.cpython-310.pyc
+++ b/wan/__pycache__/__init__.cpython-310.pyc
--- a/wan/__pycache__/animate.cpython-310.pyc
+++ b/wan/__pycache__/animate.cpython-310.pyc
--- a/wan/__pycache__/image2video.cpython-310.pyc
+++ b/wan/__pycache__/image2video.cpython-310.pyc