update tiny vae and docs

53adcfc1 · helloyongyang · 88c2ae38 · 53adcfc1 · 53adcfc1 · 53adcfc1
Commit 53adcfc1 authored Jul 22, 2025 by helloyongyang
10 changed files
--- a/app/gradio_demo.py
+++ b/app/gradio_demo.py
@@ -401,7 +401,7 @@ def run_inference(
        "clip_quantized_ckpt": clip_quant_ckpt,
        "clip_quant_scheme": clip_quant_scheme,
        "use_tiling_vae": use_tiling_vae,
-        "tiny_vae": use_tiny_vae,
+        "use_tiny_vae": use_tiny_vae,
        "tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None),
        "lazy_load": lazy_load,
        "do_mm_calib": False,

--- a/app/gradio_demo_zh.py
+++ b/app/gradio_demo_zh.py
@@ -403,7 +403,7 @@ def run_inference(
        "clip_quantized_ckpt": clip_quant_ckpt,
        "clip_quant_scheme": clip_quant_scheme,
        "use_tiling_vae": use_tiling_vae,
-        "tiny_vae": use_tiny_vae,
+        "use_tiny_vae": use_tiny_vae,
        "tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None),
        "lazy_load": lazy_load,
        "do_mm_calib": False,

--- a/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json
+++ b/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json
@@ -25,7 +25,7 @@
    "clip_quantized_ckpt": "/path/to/clip-fp8.pth",
    "clip_quant_scheme": "fp8",
    "use_tiling_vae": true,
-    "tiny_vae": true,
+    "use_tiny_vae": true,
    "tiny_vae_path": "/path/to/taew2_1.pth",
    "lazy_load": true
 }
--- a/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json
+++ b/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json
@@ -25,7 +25,7 @@
    "clip_quantized_ckpt": "/path/to/clip-fp8.pth",
    "clip_quant_scheme": "fp8",
    "use_tiling_vae": true,
-    "tiny_vae": true,
+    "use_tiny_vae": true,
    "tiny_vae_path": "/path/to/taew2_1.pth",
    "lazy_load": true,
    "rotary_chunk": true,

--- a/configs/offload/phase/wan_t2v_phase.json
+++ b/configs/offload/phase/wan_t2v_phase.json
@@ -18,7 +18,7 @@
    "mm_config": {
        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
    },
-    "tiny_vae": true,
+    "use_tiny_vae": true,
    "tiny_vae_path": "/x2v_models/taew2_1.pth",
    "t5_offload_granularity": "block"
 }
--- a/docs/EN/source/deploy_guides/for_low_latency.md
+++ b/docs/EN/source/deploy_guides/for_low_latency.md
@@ -22,6 +22,18 @@ For the original model without step distillation, we can use the following solut
 4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
 5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.

+## 💡 Using Tiny VAE
+
+In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
+
+```python
+{
+    "use_tiny_vae": true,
+    "tiny_vae_path": "/path to taew2_1.pth"
+}
+```
+The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
+
 ## ⚠️ Note

 Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.

--- a/docs/EN/source/deploy_guides/for_low_resource.md
+++ b/docs/EN/source/deploy_guides/for_low_resource.md
@@ -158,12 +158,12 @@ use_tiling_vae = True          # Enable VAE chunked inference

 #### Lightweight VAE

-You can download it here: https://github.com/madebyollin/taehv/blob/main/taew2_1.pth
-
 ```python
 # VAE optimization configuration
-tiny_vae = True            # Use lightweight VAE
+use_tiny_vae = True            # Use lightweight VAE
+tiny_vae_path = "/path to taew2_1.pth"
 ```
+You can download taew2_1.pth [here](https://github.com/madebyollin/taehv/blob/main/taew2_1.pth)

 **VAE Optimization Effects**:
 - Standard VAE: Baseline performance, 100% quality retention

--- a/docs/ZH_CN/source/deploy_guides/for_low_latency.md
+++ b/docs/ZH_CN/source/deploy_guides/for_low_latency.md
@@ -22,6 +22,19 @@
 4. [模型量化](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/quantization.html) 加速 Linear 层的推理。
 5. [变分辨率推理](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/changing_resolution.html) 降低中间推理步的分辨率。

+## 💡 使用Tiny VAE
+
+在某些情况下，VAE部分耗时会比较大，可以使用轻量级VAE进行加速，同时也可以降低一部分显存。
+
+```python
+{
+    "use_tiny_vae": true,
+    "tiny_vae_path": "/path to taew2_1.pth"
+}
+```
+taew2_1.pth 权重可以从[这里](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)下载
+
+
 ## ⚠️ 注意

 有一部分的加速方案之间目前无法结合使用，我们目前正在致力于解决这一问题。

--- a/docs/ZH_CN/source/deploy_guides/for_low_resource.md
+++ b/docs/ZH_CN/source/deploy_guides/for_low_resource.md
@@ -158,12 +158,12 @@ use_tiling_vae = True          # 启用VAE分块推理

 #### 轻量级VAE

-可以在这里下载：https://github.com/madebyollin/taehv/blob/main/taew2_1.pth
-
 ```python
 # VAE优化配置
-tiny_vae = True            # 使用轻量级VAE
+use_tiny_vae = True
+tiny_vae_path = "/path to taew2_1.pth"
 ```
+taew2_1.pth 权重可以从[这里](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)下载

 **VAE优化效果**:
 - 标准VAE: 基准性能，100%质量保持

--- a/lightx2v/models/runners/wan/wan_runner.py
+++ b/lightx2v/models/runners/wan/wan_runner.py
@@ -142,7 +142,7 @@ class WanRunner(DefaultRunner):
            "parallel": self.config.parallel_vae,
            "use_tiling": self.config.get("use_tiling_vae", False),
        }
-        if self.config.get("tiny_vae", False):
+        if self.config.get("use_tiny_vae", False):
            tiny_vae_path = self.config.get("tiny_vae_path", os.path.join(self.config.model_path, "taew2_1.pth"))
            vae_decoder = WanVAE_tiny(
                vae_pth=tiny_vae_path,
@@ -154,7 +154,7 @@ class WanRunner(DefaultRunner):

    def load_vae(self):
        vae_encoder = self.load_vae_encoder()
-        if vae_encoder is None or self.config.get("tiny_vae", False):
+        if vae_encoder is None or self.config.get("use_tiny_vae", False):
            vae_decoder = self.load_vae_decoder()
        else:
            vae_decoder = vae_encoder