Commit b1a21412 authored by helloyongyang's avatar helloyongyang
Browse files

update configs, docs and readme

parent 8e941d39
{
"infer_steps": 50,
"target_video_length": 81,
"text_len": 512,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6,
"sample_shift": 8,
"enable_cfg": true,
"cpu_offload": false,
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
},
"feature_caching": "Tea",
"coefficients": [
[-3.03318725e05, 4.90537029e04, -2.65530556e03, 5.87365115e01, -3.15583525e-01],
[-5784.54975374, 5449.50911966, -1811.16591783, 256.27178429, -13.02252404]
],
"use_ret_steps": true,
"teacache_thresh": 0.26
}
......@@ -13,7 +13,6 @@
"cpu_offload": true,
"offload_granularity": "block",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}
}
......@@ -14,7 +14,6 @@
"cpu_offload": true,
"offload_granularity": "block",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}
}
......@@ -15,8 +15,7 @@
"t5_offload_granularity": "block",
"dit_quantized_ckpt": "/path/to/dit_quant_model",
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm",
"weight_auto_quant": false
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
},
"t5_quantized": true,
"t5_quantized_ckpt": "/path/to/models_t5_umt5-xxl-enc-fp8.pth",
......@@ -28,4 +27,4 @@
"tiny_vae": true,
"tiny_vae_path": "/path/to/taew2_1.pth",
"lazy_load": true
}
}
\ No newline at end of file
......@@ -15,8 +15,7 @@
"t5_offload_granularity": "block",
"dit_quantized_ckpt": "/path/to/dit_quant_model",
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm",
"weight_auto_quant": false
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
},
"t5_quantized": true,
"t5_quantized_ckpt": "/path/to/models_t5_umt5-xxl-enc-fp8.pth",
......@@ -30,4 +29,4 @@
"lazy_load": true,
"rotary_chunk": true,
"clean_cuda_cache": true
}
}
\ No newline at end of file
......@@ -15,8 +15,7 @@
"t5_offload_granularity": "block",
"dit_quantized_ckpt": "/path/to/dit_int8",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
"weight_auto_quant": false
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
},
"use_tiling_vae": true
}
}
\ No newline at end of file
......@@ -15,10 +15,9 @@
"offload_granularity": "phase",
"dit_quantized_ckpt": "/path/to/dit_int8",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F",
"weight_auto_quant": false
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Q8F"
},
"tiny_vae": true,
"tiny_vae_path": "/mnt/afs_2/gushiqiao/x2v_models/taew2_1.pth",
"t5_offload_granularity": "block"
}
}
\ No newline at end of file
......@@ -8,7 +8,6 @@
"seed": 0,
"dit_quantized_ckpt": "/mtc/gushiqiao/llmc_workspace/x2v_models/hunyuan/hunyuan_i2v_int8.pth",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}
}
}
\ No newline at end of file
{
"infer_steps": 20,
"target_video_length": 33,
"i2v_resolution": "720p",
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"seed": 0,
"dit_quantized_ckpt": "/mtc/gushiqiao/llmc_workspace/x2v_models/hunyuan/hunyuan_i2v_int8.pth",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": false
}
}
......@@ -13,7 +13,6 @@
"cpu_offload": false,
"dit_quantized_ckpt": "/path/to/int8/model",
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": false
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}
}
}
\ No newline at end of file
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": true,
"cpu_offload": false,
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
}
}
# Model Quantization
lightx2v supports quantized inference for linear layers in **Dit**, enabling `w8a8-int8`, `w8a8-fp8`, `w8a8-fp8block`, `w8a8-mxfp8` and `w4a4-nvfp4` matrix multiplication.
LightX2V supports quantization inference for linear layers in `Dit`, supporting `w8a8-int8`, `w8a8-fp8`, `w8a8-fp8block`, `w8a8-mxfp8`, and `w4a4-nvfp4` matrix multiplication.
## Generating Quantized Models
## Producing Quantized Models
### Offline Quantization
Use LightX2V's convert tool to convert models into quantized models. Refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).
lightx2v also supports direct loading of pre-quantized weights. For offline model quantization, refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).
Configure the [quantization file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json):
1. Set `dit_quantized_ckpt` to the converted weight path
2. Set `weight_auto_quant` to `false` in `mm_type`
## Loading Quantized Models for Inference
Write the path of the converted quantized weights to the `dit_quantized_ckpt` field in the [configuration file](https://github.com/ModelTC/lightx2v/blob/main/configs/quantization).
## Quantized Inference
By specifying --config_json to the specific config file, you can load the quantized model for inference.
### Automatic Quantization
```shell
bash scripts/run_wan_i2v_quant_auto.sh
```
### Offline Quantization
```shell
bash scripts/run_wan_i2v_quant_offline.sh
```
## Launching Quantization Service
After offline quantization, point `--config_json` to the offline quantization JSON file.
Example modification in `scripts/start_server.sh`:
```shell
export RUNNING_FLAG=infer
python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
```
[Here](https://github.com/ModelTC/lightx2v/tree/main/scripts/quantization) are some running scripts for use.
## Advanced Quantization Features
Refer to the quantization tool [LLMC documentation](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md) for details.
For details, please refer to the documentation of the quantization tool [LLMC](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md)
......@@ -5,38 +5,15 @@ lightx2v支持对`Dit`中的线性层进行量化推理,支持`w8a8-int8`, `w8
## 生产量化模型
### 离线量化
使用LightX2V的convert工具,将模型转换成量化模型,参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)
lightx2v同时支持直接加载量化好的权重进行推理,对模型进行离线量化可参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)
将转换的权重路径,写到[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json)中的`dit_quantized_ckpt`中,同时`mm_type**中的**weight_auto_quant`置为`false`即可。
## 加载量化模型进行推理
## 量化推理
将转换后的量化权重的路径,写到[配置文件](https://github.com/ModelTC/lightx2v/blob/main/configs/quantization)中的`dit_quantized_ckpt`中。
### 自动量化
```shell
bash scripts/run_wan_i2v_quant_auto.sh
```
### 离线量化
```shell
bash scripts/run_wan_i2v_quant_offline.sh
```
通过指定--config_json到具体的config文件,即可以加载量化模型进行推理
## 启动量化服务
建议离线转好量化权重之后,`--config_json`指向到离线量化的`json`文件
比如,将`scripts/start_server.sh`脚本进行如下改动:
```shell
export RUNNING_FLAG=infer
python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
```
[这里](https://github.com/ModelTC/lightx2v/tree/main/scripts/quantization)有一些运行脚本供使用。
## 高阶量化功能
......
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
export DTYPE=BF16 # remove this can get high quality video
python -m lightx2v.infer \
--model_cls wan2.1 \
--task i2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v.json \
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
# 模型转换工具
# Model Conversion Tool
A powerful utility for converting model weights between different formats and performing quantization tasks.
This converter tool can convert model weights between different formats.
## Diffusers
Facilitates mutual conversion between diffusers architecture and lightx2v architecture
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
## Quantization
This tool supports converting fp32/fp16/bf16 model weights to INT8、FP8 type.
## Feature 1: Convert Quantized Models
This tool supports converting **FP32/FP16/BF16** model weights to **INT8, FP8** types.
### Wan DIT
......@@ -147,3 +125,25 @@ python converter.py \
--model_type wan_clip \
--quantized
```
## Feature 2: Format Conversion Between Diffusers and Lightx2v
Supports mutual conversion between Diffusers architecture and LightX2V architecture
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
# 模型转换工具
一款功能强大的实用工具可在不同格式之间转换模型权重并执行量化任务
该converter工具可在不同格式之间转换模型权重。
## Diffusers
支持 Diffusers 架构与 LightX2V 架构之间的相互转换
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
## 量化
## 功能1:转换量化模型
该工具支持将 **FP32/FP16/BF16** 模型权重转换为 **INT8、FP8** 类型。
......@@ -147,3 +125,25 @@ python converter.py \
--model_type wan_clip \
--quantized
```
## 功能2:Diffusers和Lightx2v之间的格式转换
支持 Diffusers 架构与 LightX2V 架构之间的相互转换
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment