Commit 53adcfc1 authored by helloyongyang's avatar helloyongyang
Browse files

update tiny vae and docs

parent 88c2ae38
...@@ -401,7 +401,7 @@ def run_inference( ...@@ -401,7 +401,7 @@ def run_inference(
"clip_quantized_ckpt": clip_quant_ckpt, "clip_quantized_ckpt": clip_quant_ckpt,
"clip_quant_scheme": clip_quant_scheme, "clip_quant_scheme": clip_quant_scheme,
"use_tiling_vae": use_tiling_vae, "use_tiling_vae": use_tiling_vae,
"tiny_vae": use_tiny_vae, "use_tiny_vae": use_tiny_vae,
"tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None), "tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None),
"lazy_load": lazy_load, "lazy_load": lazy_load,
"do_mm_calib": False, "do_mm_calib": False,
......
...@@ -403,7 +403,7 @@ def run_inference( ...@@ -403,7 +403,7 @@ def run_inference(
"clip_quantized_ckpt": clip_quant_ckpt, "clip_quantized_ckpt": clip_quant_ckpt,
"clip_quant_scheme": clip_quant_scheme, "clip_quant_scheme": clip_quant_scheme,
"use_tiling_vae": use_tiling_vae, "use_tiling_vae": use_tiling_vae,
"tiny_vae": use_tiny_vae, "use_tiny_vae": use_tiny_vae,
"tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None), "tiny_vae_path": (os.path.join(model_path, "taew2_1.pth") if use_tiny_vae else None),
"lazy_load": lazy_load, "lazy_load": lazy_load,
"do_mm_calib": False, "do_mm_calib": False,
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
"clip_quantized_ckpt": "/path/to/clip-fp8.pth", "clip_quantized_ckpt": "/path/to/clip-fp8.pth",
"clip_quant_scheme": "fp8", "clip_quant_scheme": "fp8",
"use_tiling_vae": true, "use_tiling_vae": true,
"tiny_vae": true, "use_tiny_vae": true,
"tiny_vae_path": "/path/to/taew2_1.pth", "tiny_vae_path": "/path/to/taew2_1.pth",
"lazy_load": true "lazy_load": true
} }
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
"clip_quantized_ckpt": "/path/to/clip-fp8.pth", "clip_quantized_ckpt": "/path/to/clip-fp8.pth",
"clip_quant_scheme": "fp8", "clip_quant_scheme": "fp8",
"use_tiling_vae": true, "use_tiling_vae": true,
"tiny_vae": true, "use_tiny_vae": true,
"tiny_vae_path": "/path/to/taew2_1.pth", "tiny_vae_path": "/path/to/taew2_1.pth",
"lazy_load": true, "lazy_load": true,
"rotary_chunk": true, "rotary_chunk": true,
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
"mm_config": { "mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F" "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
}, },
"tiny_vae": true, "use_tiny_vae": true,
"tiny_vae_path": "/x2v_models/taew2_1.pth", "tiny_vae_path": "/x2v_models/taew2_1.pth",
"t5_offload_granularity": "block" "t5_offload_granularity": "block"
} }
...@@ -22,6 +22,18 @@ For the original model without step distillation, we can use the following solut ...@@ -22,6 +22,18 @@ For the original model without step distillation, we can use the following solut
4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference. 4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps. 5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
## 💡 Using Tiny VAE
In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
```python
{
"use_tiny_vae": true,
"tiny_vae_path": "/path to taew2_1.pth"
}
```
The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
## ⚠️ Note ## ⚠️ Note
Some acceleration solutions currently cannot be used together, and we are working to resolve this issue. Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
......
...@@ -158,12 +158,12 @@ use_tiling_vae = True # Enable VAE chunked inference ...@@ -158,12 +158,12 @@ use_tiling_vae = True # Enable VAE chunked inference
#### Lightweight VAE #### Lightweight VAE
You can download it here: https://github.com/madebyollin/taehv/blob/main/taew2_1.pth
```python ```python
# VAE optimization configuration # VAE optimization configuration
tiny_vae = True # Use lightweight VAE use_tiny_vae = True # Use lightweight VAE
tiny_vae_path = "/path to taew2_1.pth"
``` ```
You can download taew2_1.pth [here](https://github.com/madebyollin/taehv/blob/main/taew2_1.pth)
**VAE Optimization Effects**: **VAE Optimization Effects**:
- Standard VAE: Baseline performance, 100% quality retention - Standard VAE: Baseline performance, 100% quality retention
......
...@@ -22,6 +22,19 @@ ...@@ -22,6 +22,19 @@
4. [模型量化](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/quantization.html) 加速 Linear 层的推理。 4. [模型量化](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/quantization.html) 加速 Linear 层的推理。
5. [变分辨率推理](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/changing_resolution.html) 降低中间推理步的分辨率。 5. [变分辨率推理](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/method_tutorials/changing_resolution.html) 降低中间推理步的分辨率。
## 💡 使用Tiny VAE
在某些情况下,VAE部分耗时会比较大,可以使用轻量级VAE进行加速,同时也可以降低一部分显存。
```python
{
"use_tiny_vae": true,
"tiny_vae_path": "/path to taew2_1.pth"
}
```
taew2_1.pth 权重可以从[这里](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)下载
## ⚠️ 注意 ## ⚠️ 注意
有一部分的加速方案之间目前无法结合使用,我们目前正在致力于解决这一问题。 有一部分的加速方案之间目前无法结合使用,我们目前正在致力于解决这一问题。
......
...@@ -158,12 +158,12 @@ use_tiling_vae = True # 启用VAE分块推理 ...@@ -158,12 +158,12 @@ use_tiling_vae = True # 启用VAE分块推理
#### 轻量级VAE #### 轻量级VAE
可以在这里下载:https://github.com/madebyollin/taehv/blob/main/taew2_1.pth
```python ```python
# VAE优化配置 # VAE优化配置
tiny_vae = True # 使用轻量级VAE use_tiny_vae = True
tiny_vae_path = "/path to taew2_1.pth"
``` ```
taew2_1.pth 权重可以从[这里](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)下载
**VAE优化效果**: **VAE优化效果**:
- 标准VAE: 基准性能,100%质量保持 - 标准VAE: 基准性能,100%质量保持
......
...@@ -142,7 +142,7 @@ class WanRunner(DefaultRunner): ...@@ -142,7 +142,7 @@ class WanRunner(DefaultRunner):
"parallel": self.config.parallel_vae, "parallel": self.config.parallel_vae,
"use_tiling": self.config.get("use_tiling_vae", False), "use_tiling": self.config.get("use_tiling_vae", False),
} }
if self.config.get("tiny_vae", False): if self.config.get("use_tiny_vae", False):
tiny_vae_path = self.config.get("tiny_vae_path", os.path.join(self.config.model_path, "taew2_1.pth")) tiny_vae_path = self.config.get("tiny_vae_path", os.path.join(self.config.model_path, "taew2_1.pth"))
vae_decoder = WanVAE_tiny( vae_decoder = WanVAE_tiny(
vae_pth=tiny_vae_path, vae_pth=tiny_vae_path,
...@@ -154,7 +154,7 @@ class WanRunner(DefaultRunner): ...@@ -154,7 +154,7 @@ class WanRunner(DefaultRunner):
def load_vae(self): def load_vae(self):
vae_encoder = self.load_vae_encoder() vae_encoder = self.load_vae_encoder()
if vae_encoder is None or self.config.get("tiny_vae", False): if vae_encoder is None or self.config.get("use_tiny_vae", False):
vae_decoder = self.load_vae_decoder() vae_decoder = self.load_vae_decoder()
else: else:
vae_decoder = vae_encoder vae_decoder = vae_encoder
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment