update caching scripts and configs

d7d45faf · helloyongyang · 220a631f · 220a631f · 220a631f · 220a631f
Commit d7d45faf authored Jun 30, 2025 by helloyongyang
17 changed files
--- a/configs/caching/hunyuan_i2v_TaylorSeer.json
+++ b/configs/caching/hunyuan_i2v_TaylorSeer.json
-{
-    "infer_steps": 20,
-    "target_video_length": 33,
-    "i2v_resolution": "720p",
-    "attention_type": "flash_attn3",
-    "seed": 0,
-    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "TaylorSeer"
-}
--- a/configs/caching/hunyuan_i2v_Tea.json
+++ b/configs/caching/hunyuan_i2v_Tea.json
-{
-    "infer_steps": 20,
-    "target_video_length": 33,
-    "i2v_resolution": "720p",
-    "attention_type": "flash_attn3",
-    "seed": 0,
-    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea"
-}
--- a/configs/caching/hunyuan_t2v_TaylorSeer.json
+++ b/configs/caching/hunyuan_t2v_TaylorSeer.json
-{
-    "infer_steps": 20,
-    "target_video_length": 33,
-    "target_height": 720,
-    "target_width": 1280,
-    "attention_type": "flash_attn3",
-    "seed": 42,
-    "feature_caching": "TaylorSeer"
-}
--- a/configs/caching/hunyuan_t2v_Tea.json
+++ b/configs/caching/hunyuan_t2v_Tea.json
-{
-    "infer_steps": 20,
-    "target_video_length": 33,
-    "target_height": 720,
-    "target_width": 1280,
-    "attention_type": "flash_attn3",
-    "seed": 42,
-    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea"
-}
--- a/configs/caching/wan_i2v_Tea_720p.json
+++ b/configs/caching/wan_i2v_Tea_720p.json
@@ -4,20 +4,10 @@
    "target_height": 480,
    "target_width": 832,
    "attention_type": "flash_attn3",
-    "seed": 42,
+    "seed": 442,
    "sample_guide_scale": 5,
-    "sample_shift": 5,
+    "sample_shift": 3,
    "enable_cfg": true,
    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea",
-    "coefficients": [
-        [8.10705460e03, 2.13393892e03, -3.72934672e02, 1.66203073e01, -4.17769401e-02],
-        [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683]
-    ],
-    "use_ret_steps": true,
-     "teacache_thresh": 0.26
+    "feature_caching": "Custom"
 }
--- a/configs/caching/wan_i2v_Tea_480p.json
+++ b/configs/caching/wan_i2v_Tea_480p.json
@@ -4,20 +4,10 @@
    "target_height": 480,
    "target_width": 832,
    "attention_type": "flash_attn3",
-    "seed": 42,
+    "seed": 442,
    "sample_guide_scale": 5,
-    "sample_shift": 5,
+    "sample_shift": 3,
    "enable_cfg": true,
    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea",
-    "coefficients": [
-        [2.57151496e05, -3.54229917e04, 1.40286849e03, -1.35890334e01, 1.32517977e-01],
-        [-3.02331670e02, 2.23948934e02, -5.25463970e01, 5.87348440e00, -2.01973289e-01]
-    ],
-    "use_ret_steps": true,
-    "teacache_thresh": 0.26
+    "feature_caching": "Tea"
 }
--- a/configs/caching/wan_t2v_Tea_14b.json
+++ b/configs/caching/wan_t2v_Tea_14b.json
@@ -10,15 +10,5 @@
    "sample_shift": 8,
    "enable_cfg": true,
    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea",
-    "coefficients": [
-        [-3.03318725e05, 4.90537029e04, -2.65530556e03, 5.87365115e01, -3.15583525e-01],
-        [-5784.54975374, 5449.50911966, -1811.16591783, 256.27178429, -13.02252404]
-    ],
-    "use_ret_steps": true,
-    "teacache_thresh": 0.26
+    "feature_caching": "Ada"
 }
--- a/configs/caching/wan_t2v_Tea_1_3b.json
+++ b/configs/caching/wan_t2v_Tea_1_3b.json
@@ -10,15 +10,5 @@
    "sample_shift": 8,
    "enable_cfg": true,
    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea",
-    "coefficients": [
-        [-5.21862437e04, 9.23041404e03, -5.28275948e02, 1.36987616e01, -4.99875664e-02],
-        [2.39676752e03, -1.31110545e03, 2.01331979e02, -8.29855975e00, 1.37887774e-01]
-    ],
-    "use_ret_steps": true,
-    "teacache_thresh": 0.26
+    "feature_caching": "Custom"
 }
--- a/configs/caching/wan_t2v_taylorseer.json
+++ b/configs/caching/wan_t2v_taylorseer.json
+{
+    "infer_steps": 50,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "attention_type": "flash_attn3",
+    "seed": 42,
+    "sample_guide_scale": 6,
+    "sample_shift": 8,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "feature_caching": "TaylorSeer"
+}
--- a/configs/caching/wan_t2v_tea.json
+++ b/configs/caching/wan_t2v_tea.json
+{
+    "infer_steps": 50,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "attention_type": "flash_attn3",
+    "seed": 42,
+    "sample_guide_scale": 6,
+    "sample_shift": 8,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "feature_caching": "Tea"
+}
--- a/scripts/run_wan_i2v_with_lora.sh
+++ b/scripts/run_wan_i2v_with_lora.sh
@@ -3,7 +3,6 @@
 # set path and first
 lightx2v_path=
 model_path=
-lora_path=

 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
@@ -12,40 +11,30 @@ if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi

-if [ -z "${model_path}" ]; then
-    echo "Error: model_path is not set. Please set this variable first."
+if [ -z "${lightx2v_path}" ]; then
+    echo "Error: lightx2v_path is not set. Please set this variable first."
    exit 1
 fi

-if [ -z "${lora_path}" ]; then
-    echo "Error: lora_path is not set. Please set this variable first."
+if [ -z "${model_path}" ]; then
+    echo "Error: model_path is not set. Please set this variable first."
    exit 1
 fi

 export TOKENIZERS_PARALLELISM=false

 export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
-export DTYPE=BF16
+
 export ENABLE_PROFILING_DEBUG=true
+export ENABLE_GRAPH_MODE=false
+export DTYPE=BF16

 python -m lightx2v.infer \
 --model_cls wan2.1 \
 --task i2v \
 --model_path $model_path \
--prompt "画面中的物体轻轻向上跃起，变成了外貌相似的毛绒玩具。毛绒玩具有着一双眼睛，它的颜色和之前的一样。然后，它开始跳跃起来。背景保持一致，气氛显得格外俏皮。" \
--infer_steps 40 \
--target_video_length 81 \
--target_width  832 \
--target_height 480 \
--attention_type flash_attn3 \
--seed 42 \
--negative_prompt "画面过曝，模糊，文字，字幕" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4 \
--sample_guide_scale 5 \
--sample_shift 5 \
+--config_json ${lightx2v_path}/configs/caching/wan_i2v_custom.json \
+--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
+--negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
 --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--lora_path ${lora_path} \
--feature_caching Tea \
--mm_config '{"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm", "weight_auto_quant": true}' \
-# --mm_config '{"mm_type": "Default", "weight_auto_quant": true}' \
-# --use_ret_steps \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_custom.mp4
--- a/scripts/run_wan_i2v_tea.sh
+++ b/scripts/run_wan_i2v_tea.sh
@@ -7,7 +7,7 @@ model_path=
 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
    cuda_devices=0
-    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi

@@ -24,15 +24,16 @@ fi
 export TOKENIZERS_PARALLELISM=false

 export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
-export DTYPE=BF16
+
 export ENABLE_PROFILING_DEBUG=true
 export ENABLE_GRAPH_MODE=false
+export DTYPE=BF16

 python -m lightx2v.infer \
 --model_cls wan2.1 \
 --task i2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/wan_i2v_Tea.json \
+--config_json ${lightx2v_path}/configs/caching/wan_i2v_tea.json \
 --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
 --negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
 --image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \

--- a/scripts/run_hunyuan_t2v_taylorseer.sh
+++ b/scripts/run_hunyuan_t2v_taylorseer.sh
@@ -26,11 +26,13 @@ export TOKENIZERS_PARALLELISM=false
 export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
 export DTYPE=BF16
 export ENABLE_PROFILING_DEBUG=true
+export ENABLE_GRAPH_MODE=false

 python -m lightx2v.infer \
--model_cls hunyuan \
+--model_cls wan2.1 \
 --task t2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/hunyuan_t2v_TaylorSeer.json \
--prompt "A cat walks on the grass, realistic style." \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v_taylor.mp4
+--config_json ${lightx2v_path}/configs/caching/wan_t2v_ada.json \
+--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
+--negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_ada.mp4
--- a/scripts/run_hunyuan_i2v_taylorseer.sh
+++ b/scripts/run_hunyuan_i2v_taylorseer.sh
@@ -7,7 +7,7 @@ model_path=
 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
    cuda_devices=0
-    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi

@@ -29,10 +29,10 @@ export ENABLE_PROFILING_DEBUG=true
 export ENABLE_GRAPH_MODE=false

 python -m lightx2v.infer \
--model_cls hunyuan \
--task i2v \
+--model_cls wan2.1 \
+--task t2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/hunyuan_i2v_TaylorSeer.json \
--prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v_taylor.mp4
+--config_json ${lightx2v_path}/configs/caching/wan_t2v_custom.json \
+--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
+--negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_custom.mp4
--- a/scripts/run_hunyuan_i2v_tea.sh
+++ b/scripts/run_hunyuan_i2v_tea.sh
@@ -7,7 +7,7 @@ model_path=
 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
    cuda_devices=0
-    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi

@@ -29,10 +29,10 @@ export ENABLE_PROFILING_DEBUG=true
 export ENABLE_GRAPH_MODE=false

 python -m lightx2v.infer \
--model_cls hunyuan \
--task i2v \
+--model_cls wan2.1 \
+--task t2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/hunyuan_i2v_Tea.json \
--prompt "An Asian man with short hair in black tactical uniform and white clothes waves a firework stick." \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_1.jpg \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_i2v_tea.mp4
+--config_json ${lightx2v_path}/configs/caching/wan_t2v_taylorseer.json \
+--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
+--negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_taylorseer.mp4
--- a/scripts/run_wan_t2v_tea.sh
+++ b/scripts/run_wan_t2v_tea.sh
@@ -7,7 +7,7 @@ model_path=
 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
    cuda_devices=0
-    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi

@@ -32,7 +32,7 @@ python -m lightx2v.infer \
 --model_cls wan2.1 \
 --task t2v \
 --model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/wan_t2v_Tea.json \
+--config_json ${lightx2v_path}/configs/caching/wan_t2v_tea.json \
 --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
 --negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_tea.mp4
--- a/scripts/run_hunyuan_t2v_tea.sh
+++ b/scripts/run_hunyuan_t2v_tea.sh
-#!/bin/bash
-
-# set path and first
-lightx2v_path=
-model_path=
-
-# check section
-if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
-    cuda_devices=0
-    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using defalt value: ${cuda_devices}, change at shell script or set env variable."
-    export CUDA_VISIBLE_DEVICES=${cuda_devices}
-fi
-
-if [ -z "${lightx2v_path}" ]; then
-    echo "Error: lightx2v_path is not set. Please set this variable first."
-    exit 1
-fi
-
-if [ -z "${model_path}" ]; then
-    echo "Error: model_path is not set. Please set this variable first."
-    exit 1
-fi
-
-export TOKENIZERS_PARALLELISM=false
-
-export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
-export DTYPE=BF16
-export ENABLE_PROFILING_DEBUG=true
-
-python -m lightx2v.infer \
--model_cls hunyuan \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/hunyuan_t2v_Tea.json \
--prompt "A cat walks on the grass, realistic style." \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_hy_t2v_tea.mp4