update config attention type

8b230da5 · wangshankun · fad005dd · 8b230da5 · 8b230da5 · 8b230da5
Commit 8b230da5 authored Jul 03, 2025 by wangshankun
9 changed files
--- a/configs/skyreels/wan_skyreels_v2_i2v.json
+++ b/configs/skyreels/wan_skyreels_v2_i2v.json
@@ -4,7 +4,9 @@
    "text_len": 512,
    "target_height": 544,
    "target_width": 960,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 5,
    "sample_shift": 3

--- a/configs/skyreels/wan_skyreels_v2_t2v.json
+++ b/configs/skyreels/wan_skyreels_v2_t2v.json
@@ -4,7 +4,9 @@
    "text_len": 512,
    "target_height": 544,
    "target_width": 960,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 6,
    "sample_shift": 8

--- a/configs/wan/wan_i2v.json
+++ b/configs/wan/wan_i2v.json
@@ -3,7 +3,9 @@
    "target_video_length": 81,
    "target_height": 480,
    "target_width": 832,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 442,
    "sample_guide_scale": 5,
    "sample_shift": 3,

--- a/configs/wan/wan_i2v_dist.json
+++ b/configs/wan/wan_i2v_dist.json
@@ -3,7 +3,9 @@
    "target_video_length": 81,
    "target_height": 480,
    "target_width": 832,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 5,
    "sample_shift": 5,

--- a/configs/wan/wan_t2v.json
+++ b/configs/wan/wan_t2v.json
@@ -4,7 +4,9 @@
    "text_len": 512,
    "target_height": 480,
    "target_width": 832,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 6,
    "sample_shift": 8,

--- a/configs/wan/wan_t2v_dist.json
+++ b/configs/wan/wan_t2v_dist.json
@@ -4,7 +4,9 @@
    "text_len": 512,
    "target_height": 480,
    "target_width": 832,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 6,
    "sample_shift": 8,

--- a/configs/wan/wan_t2v_enhancer.json
+++ b/configs/wan/wan_t2v_enhancer.json
@@ -4,7 +4,9 @@
    "text_len": 512,
    "target_height": 480,
    "target_width": 832,
-    "attention_type": "flash_attn3",
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
    "seed": 42,
    "sample_guide_scale": 6,
    "sample_shift": 8,

--- a/lightx2v/models/networks/wan/lora_adapter.py
+++ b/lightx2v/models/networks/wan/lora_adapter.py
@@ -85,8 +85,6 @@ class WanLoraWrapper:
            if name in lora_pairs:
                if name not in self.override_dict:
                    self.override_dict[name] = param.clone().cpu()
-                # import pdb
-                # pdb.set_trace()
                name_lora_A, name_lora_B = lora_pairs[name]
                lora_A = lora_weights[name_lora_A].to(param.device, param.dtype)
                lora_B = lora_weights[name_lora_B].to(param.device, param.dtype)

--- a/scripts/run_wan_i2v_audio.sh
+++ b/scripts/run_wan_i2v_audio.sh
@@ -33,7 +33,7 @@ python -m lightx2v.infer \
 --model_cls wan2.1_audio \
 --task i2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/wan_i2v_audio.json \
+--config_json ${lightx2v_path}/configs/audio_driven/wan_i2v_audio.json \
 --prompt_path ${lightx2v_path}/assets/inputs/audio/15.txt \
 --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
 --image_path ${lightx2v_path}/assets/inputs/audio/15.png \