Commit 9b03e5c1 authored by helloyongyang's avatar helloyongyang
Browse files

add some docs and update configs and readme

parent fe13f4db
...@@ -21,52 +21,11 @@ ...@@ -21,52 +21,11 @@
[Wan2.1-T2V-CausVid](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-CausVid) [Wan2.1-T2V-CausVid](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-CausVid)
[SkyReels-V2-DF](https://huggingface.co/Skywork/SkyReels-V2-DF-14B-540P) [SkyReels-V2-DF](https://huggingface.co/Skywork/SkyReels-V2-DF-14B-540P)
## Build Env With Conda
```shell
# clone repo and submodules
git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
git submodule update --init --recursive
# create conda env and install requirments
conda create -n lightx2v python=3.11 && conda activate lightx2v
pip install -r requirements.txt
# Install again separately to bypass the version conflict check
pip install transformers==4.45.2
# install flash-attention 2
cd lightx2v/3rd/flash-attention && pip install --no-cache-dir -v -e .
# install flash-attention 3, only if hopper
cd lightx2v/3rd/flash-attention/hopper && pip install --no-cache-dir -v -e .
```
## Build Env With Docker
```shell
docker pull lightx2v/lightx2v:latest
docker run -it --rm --name lightx2v --gpus all --ipc=host lightx2v/lightx2v:latest
```
## Run
Infer
```shell ## How to Run
# modify the parameters of the running script
bash scripts/run_hunyuan_t2v.sh
```
Start A Server Please refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/docs) in lightx2v
```shell
# modify the parameters of the running script
bash scripts/start_server.sh
# modify the message of the post.py
python post.py
```
## Contributing Guidelines ## Contributing Guidelines
......
...@@ -14,5 +14,5 @@ ...@@ -14,5 +14,5 @@
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm", "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"quant_method": "smoothquant" "quant_method": "smoothquant"
}, },
"naive_quant_path": "/path/to/int8_model" "quant_model_path": "/path/to/int8_model"
} }
...@@ -5,9 +5,5 @@ ...@@ -5,9 +5,5 @@
"target_width": 1280, "target_width": 1280,
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
},
"feature_caching": "TaylorSeer" "feature_caching": "TaylorSeer"
} }
...@@ -5,9 +5,5 @@ ...@@ -5,9 +5,5 @@
"target_width": 1280, "target_width": 1280,
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
},
"parallel_attn_type": "ring" "parallel_attn_type": "ring"
} }
...@@ -5,9 +5,5 @@ ...@@ -5,9 +5,5 @@
"target_width": 1280, "target_width": 1280,
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
},
"parallel_attn_type": "ulysses" "parallel_attn_type": "ulysses"
} }
...@@ -3,9 +3,5 @@ ...@@ -3,9 +3,5 @@
"target_video_length": 33, "target_video_length": 33,
"i2v_resolution": "720p", "i2v_resolution": "720p",
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 0, "seed": 0
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
}
} }
...@@ -7,5 +7,5 @@ ...@@ -7,5 +7,5 @@
"mm_config": { "mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm" "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}, },
"naive_quant_path": "./hy_i2v_quant_model" "quant_model_path": "./hy_i2v_quant_model"
} }
...@@ -4,9 +4,5 @@ ...@@ -4,9 +4,5 @@
"target_height": 720, "target_height": 720,
"target_width": 1280, "target_width": 1280,
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm",
"weight_auto_quant": true
}
} }
...@@ -8,5 +8,5 @@ ...@@ -8,5 +8,5 @@
"mm_config": { "mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm" "mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm"
}, },
"naive_quant_path": "./hy_t2v_quant_model" "quant_model_path": "./hy_t2v_quant_model"
} }
...@@ -8,9 +8,5 @@ ...@@ -8,9 +8,5 @@
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": false, "cpu_offload": false
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
}
} }
...@@ -12,5 +12,5 @@ ...@@ -12,5 +12,5 @@
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl"
}, },
"naive_quant_path": "./wan_i2v_quant_model" "quant_model_path": "./wan_i2v_quant_model"
} }
...@@ -12,9 +12,5 @@ ...@@ -12,9 +12,5 @@
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 3, "sample_shift": 3
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
}
} }
...@@ -7,9 +7,5 @@ ...@@ -7,9 +7,5 @@
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 3, "sample_shift": 3
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
}
} }
...@@ -7,9 +7,5 @@ ...@@ -7,9 +7,5 @@
"attention_type": "flash_attn3", "attention_type": "flash_attn3",
"seed": 42, "seed": 42,
"sample_guide_scale": 6, "sample_guide_scale": 6,
"sample_shift": 8, "sample_shift": 8
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
}
} }
...@@ -9,9 +9,5 @@ ...@@ -9,9 +9,5 @@
"sample_guide_scale": 6, "sample_guide_scale": 6,
"sample_shift": 8, "sample_shift": 8,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": false, "cpu_offload": false
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl",
"weight_auto_quant": true
}
} }
...@@ -13,5 +13,5 @@ ...@@ -13,5 +13,5 @@
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl"
}, },
"naive_quant_path": "./wan_t2v_quant_model" "quant_model_path": "./wan_t2v_quant_model"
} }
# Prepare Environment
We recommend using a docker environment. Here is the [dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags) for lightx2v. Please select the tag with the latest date, for example, 25042502.
```shell
docker pull lightx2v/lightx2v:25042502
docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings] --entrypoint /bin/bash [image_id]
```
If you want to set up the environment yourself using conda, you can refer to the following steps:
```shell
# clone repo and submodules
git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
git submodule update --init --recursive
conda create -n lightx2v python=3.11 && conda activate lightx2v
pip install -r requirements.txt
# Install again separately to bypass the version conflict check
# The Hunyuan model needs to run under this version of transformers. If you do not need to run the Hunyuan model, you can ignore this step.
pip install transformers==4.45.2
# install flash-attention 2
cd lightx2v/3rd/flash-attention && pip install --no-cache-dir -v -e .
# install flash-attention 3, only if hopper
cd lightx2v/3rd/flash-attention/hopper && pip install --no-cache-dir -v -e .
```
# Infer
```shell
# Modify the path in the script
bash scripts/run_wan_t2v.sh
```
In addition to the existing input arguments in the script, there are also some necessary parameters in the `${lightx2v_path}/configs/wan_t2v.json` file specified by `--config_json`. You can modify them as needed.
# How to Start the Service
lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
### Start the Service
```shell
# Modify the paths in the script
bash scripts/start_server.sh
```
The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
### Client Sends Request
```shell
python scripts/post.py
```
The service endpoint is: `/v1/local/video/generate`
The `message` parameter in `scripts/post.py` is as follows:
```python
message = {
"task_id": generate_task_id(),
"task_id_must_unique": True,
"prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
"negative_prompt": "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": "",
"save_video_path": "./output_lightx2v_wan_t2v_t02.mp4",
}
```
1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed.
2. `save_video_path` specifies the path where the generated video will be saved on the server. The relative path is relative to the server's startup directory. It is recommended to set an absolute path according to your environment.
3. `task_id` is the ID of the task, which is a string. You can customize a string or use the `generate_task_id()` function to generate a random string. The task ID is used to distinguish between different video generation tasks.
4. `task_id_must_unique` indicates whether each `task_id` must be unique. If set to `False`, there is no such restriction. In this case, if duplicate `task_id`s are sent, the server's `task` record will be overwritten by the newer task with the same `task_id`. If you do not need to keep a record of all tasks for querying, you can set this to `False`.
### Client Checks Server Status
```shell
python scripts/check_status.py
```
The service endpoints include:
1. `/v1/local/video/generate/service_status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
2. `/v1/local/video/generate/get_all_tasks` is used to get all tasks received and completed by the server.
3. `/v1/local/video/generate/task_status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
### Client Stops the Current Task on the Server at Any Time
```shell
python scripts/stop_running_task.py
```
The service endpoint is: `/v1/local/video/generate/stop_running_task`
After terminating the task, the server will not exit but will return to waiting for new requests.
# Quantization
lightx2v supports quantized inference for linear layers, supporting w8a8 and fp8 matrix multiplication.
### Run Quantized Inference
```shell
# Modify the path in the script
bash scripts/run_wan_t2v_save_quant.sh
```
There are two execution commands in the script:
#### Save Quantization Weights
Set the `RUNNING_FLAG` environment variable to `save_naive_quant`, and set `--config_json` to the corresponding `json` file: `${lightx2v_path}/configs/wan_t2v_save_quant.json`. In this file, `quant_model_path` specifies the path to save the quantized model.
#### Load Quantization Weights and Inference
Set the `RUNNING_FLAG` environment variable to `infer`, and set `--config_json` to the `json` file from the previous step.
### Start Quantization Service
After saving the quantized weights, as in the previous loading step, set the `RUNNING_FLAG` environment variable to `infer`, and set `--config_json` to the `json` file from the first step.
For example, modify the `scripts/start_server.sh` script as follows:
```shell
export RUNNING_FLAG=infer
python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/wan_t2v_save_quant.json \
--port 8000
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment