Update audio quant config

2e5794c7 · gushiqiao · 40ef69cd · 2e5794c7 · 2e5794c7
Commit 2e5794c7 authored Aug 01, 2025 by gushiqiao
Showing with 25 additions and 1 deletion

configs/audio_driven/wan_i2v_audio_quant.json configs/audio_driven/wan_i2v_audio_quant.json +22 -0

lightx2v/common/ops/attn/sage_attn.py lightx2v/common/ops/attn/sage_attn.py +3 -1

No files found.
--- a/configs/audio_driven/wan_i2v_audio_quant.json
+++ b/configs/audio_driven/wan_i2v_audio_quant.json
+{
+    "infer_steps": 4,
+    "target_fps": 16,
+    "video_duration": 16,
+    "audio_sr": 16000,
+    "target_video_length": 81,
+    "target_height": 720,
+    "target_width": 1280,
+    "self_attn_1_type": "sage_attn2",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "seed": 42,
+    "sample_guide_scale": 1,
+    "sample_shift": 5,
+    "enable_cfg": false,
+    "cpu_offload": false,
+    "use_31_block": false,
+    "dit_quantized_ckpt": "/path/to/Wan2.1-R2V721-Audio-14B-720P/fp8",
+    "mm_config": {
+        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
+    }
+}
--- a/lightx2v/common/ops/attn/sage_attn.py
+++ b/lightx2v/common/ops/attn/sage_attn.py
@@ -50,7 +50,7 @@ class SageAttn2Weight(AttnWeightTemplate):
            )
            x = torch.cat((x1, x2), dim=1)
            x = x.view(max_seqlen_q, -1)
-        elif model_cls in ["wan2.1", "wan2.1_distill", "wan2.1_causvid", "wan2.1_df"]:
+        elif model_cls in ["wan2.1", "wan2.1_distill", "wan2.1_causvid", "wan2.1_df", "wan2.1_audio"]:
            x = sageattn(
                q.unsqueeze(0),
                k.unsqueeze(0),
@@ -58,4 +58,6 @@ class SageAttn2Weight(AttnWeightTemplate):
                tensor_layout="NHD",
            )
            x = x.view(max_seqlen_q, -1)
+        else:
+            raise NotImplementedError(f"Model class '{model_cls}' is not implemented in this attention implementation")
        return x