update configs, docs and readme

b1a21412 · helloyongyang · 8e941d39 · 8e941d39 · b1a21412 · b1a21412
Commit b1a21412 authored Jul 11, 2025 by helloyongyang
16 changed files
--- a/configs/caching/teacache/wan_t2v_14b_tea_480p.json
+++ b/configs/caching/teacache/wan_t2v_14b_tea_480p.json
-{
-    "infer_steps": 50,
-    "target_video_length": 81,
-    "text_len": 512,
-    "target_height": 480,
-    "target_width": 832,
-    "self_attn_1_type": "flash_attn3",
-    "cross_attn_1_type": "flash_attn3",
-    "cross_attn_2_type": "flash_attn3",
-    "seed": 42,
-    "sample_guide_scale": 6,
-    "sample_shift": 8,
-    "enable_cfg": true,
-    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
-        "weight_auto_quant": true
-    },
-    "feature_caching": "Tea",
-    "coefficients": [
-        [-3.03318725e05, 4.90537029e04, -2.65530556e03, 5.87365115e01, -3.15583525e-01],
-        [-5784.54975374, 5449.50911966, -1811.16591783, 256.27178429, -13.02252404]
-    ],
-    "use_ret_steps": true,
-    "teacache_thresh": 0.26
-}
--- a/configs/offload/block/wan_i2v_block.json
+++ b/configs/offload/block/wan_i2v_block.json
@@ -13,7 +13,6 @@
    "cpu_offload": true,
    "offload_granularity": "block",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
    }
 }
--- a/configs/offload/block/wan_t2v_block.json
+++ b/configs/offload/block/wan_t2v_block.json
@@ -14,7 +14,6 @@
    "cpu_offload": true,
    "offload_granularity": "block",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
    }
 }
--- a/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json
+++ b/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json
@@ -15,8 +15,7 @@
    "t5_offload_granularity": "block",
    "dit_quantized_ckpt": "/path/to/dit_quant_model",
    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": false
+        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
    },
    "t5_quantized": true,
    "t5_quantized_ckpt": "/path/to/models_t5_umt5-xxl-enc-fp8.pth",
@@ -28,4 +27,4 @@
    "tiny_vae": true,
    "tiny_vae_path": "/path/to/taew2_1.pth",
    "lazy_load": true
-}
+}
\ No newline at end of file
--- a/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json
+++ b/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json
@@ -15,8 +15,7 @@
    "t5_offload_granularity": "block",
    "dit_quantized_ckpt": "/path/to/dit_quant_model",
    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": false
+        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
    },
    "t5_quantized": true,
    "t5_quantized_ckpt": "/path/to/models_t5_umt5-xxl-enc-fp8.pth",
@@ -30,4 +29,4 @@
    "lazy_load": true,
    "rotary_chunk": true,
    "clean_cuda_cache": true
-}
+}
\ No newline at end of file
--- a/configs/offload/phase/wan_i2v_phase.json
+++ b/configs/offload/phase/wan_i2v_phase.json
@@ -15,8 +15,7 @@
    "t5_offload_granularity": "block",
    "dit_quantized_ckpt": "/path/to/dit_int8",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
-        "weight_auto_quant": false
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
    },
    "use_tiling_vae": true
-}
+}
\ No newline at end of file
--- a/configs/offload/phase/wan_t2v_phase.json
+++ b/configs/offload/phase/wan_t2v_phase.json
@@ -15,10 +15,9 @@
    "offload_granularity": "phase",
    "dit_quantized_ckpt": "/path/to/dit_int8",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
-        "weight_auto_quant": false
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
    },
    "tiny_vae": true,
    "tiny_vae_path": "/mnt/afs_2/gushiqiao/x2v_models/taew2_1.pth",
    "t5_offload_granularity": "block"
-}
+}
\ No newline at end of file
--- a/configs/quantization/hunyuan_i2v_auto.json
+++ b/configs/quantization/hunyuan_i2v_auto.json
@@ -8,7 +8,6 @@
    "seed": 0,
    "dit_quantized_ckpt": "/mtc/gushiqiao/llmc_workspace/x2v_models/hunyuan/hunyuan_i2v_int8.pth",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
    }
-}
+}
\ No newline at end of file
--- a/configs/quantization/hunyuan_i2v_offline.json
+++ b/configs/quantization/hunyuan_i2v_offline.json
-{
-    "infer_steps": 20,
-    "target_video_length": 33,
-    "i2v_resolution": "720p",
-    "self_attn_1_type": "flash_attn3",
-    "cross_attn_1_type": "flash_attn3",
-    "cross_attn_2_type": "flash_attn3",
-    "seed": 0,
-    "dit_quantized_ckpt": "/mtc/gushiqiao/llmc_workspace/x2v_models/hunyuan/hunyuan_i2v_int8.pth",
-    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": false
-    }
-}
--- a/configs/quantization/wan_i2v_quant_offline.json
+++ b/configs/quantization/wan_i2v_quant_offline.json
@@ -13,7 +13,6 @@
    "cpu_offload": false,
    "dit_quantized_ckpt": "/path/to/int8/model",
    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": false
+        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
    }
-}
+}
\ No newline at end of file
--- a/configs/quantization/wan_i2v_quant_auto.json
+++ b/configs/quantization/wan_i2v_quant_auto.json
-{
-    "infer_steps": 40,
-    "target_video_length": 81,
-    "target_height": 480,
-    "target_width": 832,
-    "self_attn_1_type": "sage_attn2",
-    "cross_attn_1_type": "sage_attn2",
-    "cross_attn_2_type": "sage_attn2",
-    "seed": 42,
-    "sample_guide_scale": 5,
-    "sample_shift": 5,
-    "enable_cfg": true,
-    "cpu_offload": false,
-    "mm_config": {
-        "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
-        "weight_auto_quant": true
-    }
-}
--- a/docs/EN/source/method_tutorials/quantization.md
+++ b/docs/EN/source/method_tutorials/quantization.md
 # Model Quantization

-lightx2v supports quantized inference for linear layers in **Dit**, enabling `w8a8-int8`, `w8a8-fp8`, `w8a8-fp8block`, `w8a8-mxfp8` and `w4a4-nvfp4` matrix multiplication.
+LightX2V supports quantization inference for linear layers in `Dit`, supporting `w8a8-int8`, `w8a8-fp8`, `w8a8-fp8block`, `w8a8-mxfp8`, and `w4a4-nvfp4` matrix multiplication.


-## Generating Quantized Models
+## Producing Quantized Models

-### Offline Quantization
+Use LightX2V's convert tool to convert models into quantized models. Refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).

-lightx2v also supports direct loading of pre-quantized weights. For offline model quantization, refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).
-Configure the [quantization file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json):
-1. Set `dit_quantized_ckpt` to the converted weight path
-2. Set `weight_auto_quant` to `false` in `mm_type`
+## Loading Quantized Models for Inference

+Write the path of the converted quantized weights to the `dit_quantized_ckpt` field in the [configuration file](https://github.com/ModelTC/lightx2v/blob/main/configs/quantization).

-## Quantized Inference
+By specifying --config_json to the specific config file, you can load the quantized model for inference.

-### Automatic Quantization
-```shell
-bash scripts/run_wan_i2v_quant_auto.sh
-```
-
-### Offline Quantization
-```shell
-bash scripts/run_wan_i2v_quant_offline.sh
-
-```
-
-## Launching Quantization Service
-
-
-After offline quantization, point `--config_json` to the offline quantization JSON file.
-
-Example modification in `scripts/start_server.sh`:
-
-```shell
-export RUNNING_FLAG=infer
-
-python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
-```
+[Here](https://github.com/ModelTC/lightx2v/tree/main/scripts/quantization) are some running scripts for use.

 ## Advanced Quantization Features

-Refer to the quantization tool [LLMC documentation](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md) for details.
+For details, please refer to the documentation of the quantization tool [LLMC](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md)
--- a/docs/ZH_CN/source/method_tutorials/quantization.md
+++ b/docs/ZH_CN/source/method_tutorials/quantization.md
@@ -5,38 +5,15 @@ lightx2v支持对`Dit`中的线性层进行量化推理，支持`w8a8-int8`, `w8

 ## 生产量化模型

-### 离线量化
+使用LightX2V的convert工具，将模型转换成量化模型，参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)。

-lightx2v同时支持直接加载量化好的权重进行推理，对模型进行离线量化可参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)。
-将转换的权重路径，写到[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json)中的`dit_quantized_ckpt`中，同时`mm_type**中的**weight_auto_quant`置为`false`即可。
+## 加载量化模型进行推理

-## 量化推理
+将转换后的量化权重的路径，写到[配置文件](https://github.com/ModelTC/lightx2v/blob/main/configs/quantization)中的`dit_quantized_ckpt`中。

-### 自动量化
-```shell
-bash scripts/run_wan_i2v_quant_auto.sh
-```
-### 离线量化
-```shell
-bash scripts/run_wan_i2v_quant_offline.sh
-```
+通过指定--config_json到具体的config文件，即可以加载量化模型进行推理

-## 启动量化服务
-
-建议离线转好量化权重之后，`--config_json`指向到离线量化的`json`文件
-
-比如，将`scripts/start_server.sh`脚本进行如下改动：
-
-```shell
-export RUNNING_FLAG=infer
-
-python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
-```
+[这里](https://github.com/ModelTC/lightx2v/tree/main/scripts/quantization)有一些运行脚本供使用。

 ## 高阶量化功能


--- a/scripts/quantization/run_wan_i2v_quantization.sh
+++ b/scripts/quantization/run_wan_i2v_quantization.sh
+#!/bin/bash
+
+# set path and first
+lightx2v_path=
+model_path=
+
+# check section
+if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
+    cuda_devices=0
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
+    export CUDA_VISIBLE_DEVICES=${cuda_devices}
+fi
+
+if [ -z "${lightx2v_path}" ]; then
+    echo "Error: lightx2v_path is not set. Please set this variable first."
+    exit 1
+fi
+
+if [ -z "${model_path}" ]; then
+    echo "Error: model_path is not set. Please set this variable first."
+    exit 1
+fi
+
+export TOKENIZERS_PARALLELISM=false
+
+export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
+
+export ENABLE_PROFILING_DEBUG=true
+export ENABLE_GRAPH_MODE=false
+export DTYPE=BF16  # remove this can get high quality video
+
+python -m lightx2v.infer \
+--model_cls wan2.1 \
+--task i2v \
+--model_path $model_path \
+--config_json ${lightx2v_path}/configs/quantization/wan_i2v.json \
+--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
+--negative_prompt "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走" \
+--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
--- a/tools/convert/readme.md
+++ b/tools/convert/readme.md
-# 模型转换工具
+# Model Conversion Tool

-A powerful utility for converting model weights between different formats and performing quantization tasks.
+This converter tool can convert model weights between different formats.

-## Diffusers
-Facilitates mutual conversion between diffusers architecture and lightx2v architecture
-
-### Lightx2v->Diffusers
-```bash
-python converter.py \
-       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
-       --output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
-       --direction forward \
-       --save_by_block
-```
-
-### Diffusers->Lightx2v
-```bash
-python converter.py \
-       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
-       --output /Path/To/Wan2.1-I2V-14B-480P \
-       --direction backward \
-       --save_by_block
-```
-
-
-## Quantization
-This tool supports converting fp32/fp16/bf16 model weights to INT8、FP8 type.
+## Feature 1: Convert Quantized Models

+This tool supports converting **FP32/FP16/BF16** model weights to **INT8, FP8** types.

 ### Wan DIT

@@ -147,3 +125,25 @@ python converter.py \
  --model_type wan_clip \
  --quantized
 ```
+
+
+## Feature 2: Format Conversion Between Diffusers and Lightx2v
+Supports mutual conversion between Diffusers architecture and LightX2V architecture
+
+### Lightx2v->Diffusers
+```bash
+python converter.py \
+       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
+       --output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
+       --direction forward \
+       --save_by_block
+```
+
+### Diffusers->Lightx2v
+```bash
+python converter.py \
+       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
+       --output /Path/To/Wan2.1-I2V-14B-480P \
+       --direction backward \
+       --save_by_block
+```
--- a/tools/convert/readme_zh.md
+++ b/tools/convert/readme_zh.md
 # 模型转换工具

-一款功能强大的实用工具，可在不同格式之间转换模型权重并执行量化任务。
+该converter工具可在不同格式之间转换模型权重。

-## Diffusers
-支持 Diffusers 架构与 LightX2V 架构之间的相互转换
-
-### Lightx2v->Diffusers
-```bash
-python converter.py \
-       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
-       --output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
-       --direction forward \
-       --save_by_block
-```
-
-### Diffusers->Lightx2v
-```bash
-python converter.py \
-       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
-       --output /Path/To/Wan2.1-I2V-14B-480P \
-       --direction backward \
-       --save_by_block
-```
-
-
-## 量化
+## 功能1：转换量化模型

 该工具支持将 **FP32/FP16/BF16** 模型权重转换为 **INT8、FP8** 类型。

@@ -147,3 +125,25 @@ python converter.py \
  --model_type wan_clip \
  --quantized
 ```
+
+
+## 功能2：Diffusers和Lightx2v之间的格式转换
+支持 Diffusers 架构与 LightX2V 架构之间的相互转换
+
+### Lightx2v->Diffusers
+```bash
+python converter.py \
+       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
+       --output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
+       --direction forward \
+       --save_by_block
+```
+
+### Diffusers->Lightx2v
+```bash
+python converter.py \
+       --source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
+       --output /Path/To/Wan2.1-I2V-14B-480P \
+       --direction backward \
+       --save_by_block
+```