Commit 62f4464d authored by helloyongyang's avatar helloyongyang
Browse files

update docs

parent 1a798103
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import logging
import os
import sys
from typing import List
from sphinx.ext import autodoc
import sphinxcontrib.redoc
logger = logging.getLogger(__name__)
sys.path.append(os.path.abspath("../.."))
# -- Project information -----------------------------------------------------
project = "Lightx2v"
copyright = "2024, Lightx2v Team"
author = "the Lightx2v Team"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"myst_parser",
"sphinxarg.ext",
"sphinxcontrib.redoc",
"sphinxcontrib.openapi",
]
html_static_path = ["_static"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.rst"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
copybutton_prompt_is_regexp = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_title = project
html_theme = "sphinx_book_theme"
# html_theme = 'sphinx_rtd_theme'
html_logo = "../../../assets/img_lightx2v.png"
html_theme_options = {
"path_to_docs": "docs/ZH_CN/source",
"repository_url": "https://github.com/ModelTC/lightx2v",
"use_repository_button": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Generate additional rst documentation here.
def setup(app):
# from docs.source.generate_examples import generate_examples
# generate_examples()
pass
# Mock out external dependencies here.
autodoc_mock_imports = [
"cpuinfo",
"torch",
"transformers",
"psutil",
"prometheus_client",
"sentencepiece",
"lightllm" "numpy",
"tqdm",
"tensorizer",
]
for mock_target in autodoc_mock_imports:
if mock_target in sys.modules:
logger.info(
"Potentially problematic mock target (%s) found; "
"autodoc_mock_imports cannot mock modules that have already "
"been loaded into sys.modules when the sphinx build starts.",
mock_target,
)
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == " Bases: :py:class:`object`":
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
# comfyui部署
xxx
\ No newline at end of file
# gradio部署
xxx
\ No newline at end of file
# 本地windows电脑部署
xxx
\ No newline at end of file
# 如何启动服务
lightx2v 提供异步服务功能。代码入口点在 [这里](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
### 启动服务
```shell
# 修改脚本中的路径
bash scripts/start_server.sh
```
`--port 8000` 选项表示服务将绑定到本地机器的 `8000` 端口。您可以根据需要更改此端口。
### 客户端发送请求
```shell
python scripts/post.py
```
服务端点:`/v1/tasks/`
`scripts/post.py` 中的 `message` 参数如下:
```python
message = {
"prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
"negative_prompt": "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": ""
}
```
1. `prompt``negative_prompt``image_path` 是视频生成的基本输入。`image_path` 可以是空字符串,表示不需要图像输入。
### 客户端检查服务器状态
```shell
python scripts/check_status.py
```
服务端点包括:
1. `/v1/service/status` 用于检查服务状态。返回服务是 `busy` 还是 `idle`。服务只有在 `idle` 时才接受新请求。
2. `/v1/tasks/` 用于获取服务器接收和完成的所有任务。
3. `/v1/tasks/{task_id}/status` 用于获取指定 `task_id` 的任务状态。返回任务是 `processing` 还是 `completed`
### 客户端随时停止服务器上的当前任务
```shell
python scripts/stop_running_task.py
```
服务端点:`/v1/tasks/running`
终止任务后,服务器不会退出,而是返回等待新请求的状态。
### 在单个节点上启动多个服务
在单个节点上,您可以使用 `scripts/start_server.sh` 启动多个服务(注意同一 IP 下的端口号必须不同),或者可以使用 `scripts/start_multi_servers.sh` 同时启动多个服务:
```shell
num_gpus=8 bash scripts/start_multi_servers.sh
```
其中 `num_gpus` 表示要启动的服务数量;服务将从 `--start_port` 开始在连续端口上运行。
### 多个服务之间的调度
```shell
python scripts/post_multi_servers.py
```
`post_multi_servers.py` 将根据服务的空闲状态调度多个客户端请求。
### API 端点总结
| 端点 | 方法 | 描述 |
|------|------|------|
| `/v1/tasks/` | POST | 创建视频生成任务 |
| `/v1/tasks/form` | POST | 通过表单创建视频生成任务 |
| `/v1/tasks/` | GET | 获取所有任务列表 |
| `/v1/tasks/{task_id}/status` | GET | 获取指定任务状态 |
| `/v1/tasks/{task_id}/result` | GET | 获取指定任务的结果视频文件 |
| `/v1/tasks/running` | DELETE | 停止当前运行的任务 |
| `/v1/files/download/{file_path}` | GET | 下载文件 |
| `/v1/service/status` | GET | 获取服务状态 |
# 低延迟场景部署
xxx
\ No newline at end of file
# 低资源场景部署
xxx
\ No newline at end of file
# Prepare Environment # 快速入门
We recommend using a docker environment. Here is the [dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags) for lightx2v. Please select the tag with the latest date, for example, 25061301. ## 准备环境
我们推荐使用docker环境,这是lightx2v的[dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags),请选择一个最新日期的tag,比如25061301
```shell ```shell
docker pull lightx2v/lightx2v:25061301 docker pull lightx2v/lightx2v:25061301
docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings] --entrypoint /bin/bash [image_id] docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --entrypoint /bin/bash [镜像id]
```
对于中国大陆地区,若拉取镜像的时候,网络不稳定,可以从[渡渡鸟](https://docker.aityp.com/r/docker.io/lightx2v/lightx2v)上拉取
```shell
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/lightx2v/lightx2v:25061301
``` ```
If you want to set up the environment yourself using conda, you can refer to the following steps:
如果你想使用conda自己搭建环境,可以参考如下步骤:
```shell ```shell
# clone repo and submodules # 下载github代码
git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
conda create -n lightx2v python=3.11 && conda activate lightx2v conda create -n lightx2v python=3.11 && conda activate lightx2v
pip install -r requirements.txt pip install -r requirements.txt
# Install again separately to bypass the version conflict check # 单独重新安装transformers,避免pip的冲突检查
# The Hunyuan model needs to run under this version of transformers. If you do not need to run the Hunyuan model, you can ignore this step. # 混元模型需要在4.45.2版本的transformers下运行,如果不需要跑混元模型,可以忽略
pip install transformers==4.45.2 pip install transformers==4.45.2
# install flash-attention 2 # 安装 flash-attention 2
git clone https://github.com/Dao-AILab/flash-attention.git --recursive git clone https://github.com/Dao-AILab/flash-attention.git --recursive
cd flash-attention && python setup.py install cd flash-attention && python setup.py install
# install flash-attention 3, only if hopper # 安装 flash-attention 3, 用于 hopper 显卡
cd flash-attention/hopper && python setup.py install cd flash-attention/hopper && python setup.py install
``` ```
# Infer ## 推理
```shell ```shell
# Modify the path in the script # 修改脚本中的路径
bash scripts/run_wan_t2v.sh bash scripts/run_wan_t2v.sh
``` ```
In addition to the existing input arguments in the script, there are also some necessary parameters in the `${lightx2v_path}/configs/wan_t2v.json` file specified by `--config_json`. You can modify them as needed. 除了脚本中已有的输入参数,`--config_json`指向的`${lightx2v_path}/configs/wan_t2v.json`中也会存在一些必要的参数,可以根据需要,自行修改。
欢迎了解 Lightx2v!
==================
.. figure:: ../../../assets/img_lightx2v.png
:width: 100%
:align: center
:alt: Lightx2v
:class: no-scaled-link
.. raw:: html
<p style="text-align:center">
<strong>一个轻量级的视频生成推理框架
</strong>
文档列表
-------------
.. toctree::
:maxdepth: 1
:caption: 快速入门
快速入门 <getting_started/quickstart.md>
.. toctree::
:maxdepth: 1
:caption: 方法教程
模型量化 <method_tutorials/quantization.md>
特征缓存 <method_tutorials/cache.md>
注意力机制 <method_tutorials/attention.md>
参数卸载 <method_tutorials/offload.md>
并行推理 <method_tutorials/parallel.md>
.. toctree::
:maxdepth: 1
:caption: 部署指南
低延迟场景部署 <deploy_guides/for_low_latency.md>
低资源场景部署 <deploy_guides/for_low_resource.md>
服务化部署 <deploy_guides/deploy_server.md>
gradio部署 <deploy_guides/deploy_gradio.md>
comfyui部署 <deploy_guides/deploy_comfyui.md>
本地windows电脑部署 <deploy_guides/deploy_local_windows.md>
.. Indices and tables
.. ==================
.. * :ref:`genindex`
.. * :ref:`modindex`
# 注意力机制
xxx
\ No newline at end of file
# 特征缓存
xxx
\ No newline at end of file
# 参数卸载
xxx
\ No newline at end of file
# 并行推理
xxx
\ No newline at end of file
# 模型量化
lightx2v支持对`Dit`中的线性层进行量化推理,支持`w8a8-int8``w8a8-fp8`的矩阵乘法。
## 生产量化模型
### 自动量化
lightx2v支持推理时自动对模型权重进行量化,具体可参考[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_auto.json)
值得注意的是,需要将配置文件的**mm_config**进行设置:**"mm_config": {"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm","weight_auto_quant": true }****mm_type**代表希望使用的量化算子,**weight_auto_quant:true**代表自动转量化模型。
### 离线量化
lightx2v同时支持直接加载量化好的权重进行推理,对模型进行离线量化可参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)
将转换的权重路径,写到[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json)中的`dit_quantized_ckpt`中,同时`mm_type**中的**weight_auto_quant`置为`false`即可。
## 量化推理
### 自动量化
```shell
bash scripts/run_wan_i2v_quant_auto.sh
```
### 离线量化
```shell
bash scripts/run_wan_i2v_quant_offline.sh
```
## 启动量化服务
建议离线转好量化权重之后,`--config_json`指向到离线量化的`json`文件
比如,将`scripts/start_server.sh`脚本进行如下改动:
```shell
export RUNNING_FLAG=infer
python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
```
## 高阶量化功能
具体可参考量化工具[LLMC的文档](https://github.com/ModelTC/llmc/blob/main/docs/zh_cn/source/backend/lightx2v.md)
# How to Start the Service
lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
### Start the Service
```shell
# Modify the paths in the script
bash scripts/start_server.sh
```
The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
### Client Sends Request
```shell
python scripts/post.py
```
The service endpoint is: `/v1/tasks/`
The `message` parameter in `scripts/post.py` is as follows:
```python
message = {
"prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
"negative_prompt": "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": "",
}
```
1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed.
### Client Checks Server Status
```shell
python scripts/check_status.py
```
The service endpoints include:
1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
2. `/v1/tasks/` is used to get all tasks received and completed by the server.
3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
### Client Stops the Current Task on the Server at Any Time
```shell
python scripts/stop_running_task.py
```
The service endpoint is: `/v1/tasks/running`
After terminating the task, the server will not exit but will return to waiting for new requests.
### Starting Multiple Services on a Single Node
On a single node, you can start multiple services using `scripts/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/start_multi_servers.sh`:
```shell
num_gpus=8 bash scripts/start_multi_servers.sh
```
Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
### Scheduling Between Multiple Services
```shell
python scripts/post_multi_servers.py
```
`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
### API Endpoints Summary
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/v1/tasks/` | POST | Create video generation task |
| `/v1/tasks/form` | POST | Create video generation task via form |
| `/v1/tasks/` | GET | Get all task list |
| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
| `/v1/tasks/running` | DELETE | Stop currently running task |
| `/v1/files/download/{file_path}` | GET | Download file |
| `/v1/service/status` | GET | Get service status |
# Quantization
lightx2v supports quantized inference for linear layers in **Dit**, enabling `w8a8-int8` and `w8a8-fp8` matrix multiplication.
## Generating Quantized Models
### Automatic Quantization
lightx2v supports automatic weight quantization during inference. Refer to the [configuration file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_auto.json).
**Key configuration**:
Set `"mm_config": {"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm", "weight_auto_quant": true}`.
- `mm_type`: Specifies the quantized operator
- `weight_auto_quant: true`: Enables automatic model quantization
### Offline Quantization
lightx2v also supports direct loading of pre-quantized weights. For offline model quantization, refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).
Configure the [quantization file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json):
1. Set `dit_quantized_ckpt` to the converted weight path
2. Set `weight_auto_quant` to `false` in `mm_type`
## Quantized Inference
### Automatic Quantization
```shell
bash scripts/run_wan_i2v_quant_auto.sh
```
### Offline Quantization
```shell
bash scripts/run_wan_i2v_quant_offline.sh
```
## Launching Quantization Service
After offline quantization, point `--config_json` to the offline quantization JSON file.
Example modification in `scripts/start_server.sh`:
```shell
export RUNNING_FLAG=infer
python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
```
## Advanced Quantization Features
Refer to the quantization tool [LLMC documentation](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md) for details.
sphinx == 6.2.1
sphinx-book-theme == 1.0.1
sphinx-copybutton == 0.5.2
myst-parser == 2.0.0
sphinx-argparse
sphinxcontrib.redoc
sphinxcontrib.openapi
...@@ -103,7 +103,7 @@ for k, v in state_dict.items(): ...@@ -103,7 +103,7 @@ for k, v in state_dict.items():
else: else:
print(f"text_embedder 未知 LoRA 类型: {k}") print(f"text_embedder 未知 LoRA 类型: {k}")
continue continue
''' """
# === Time Embedding === # === Time Embedding ===
elif k.startswith("condition_embedder.time_embedder.linear_"): elif k.startswith("condition_embedder.time_embedder.linear_"):
layer_id = parts[2].split("_")[1] layer_id = parts[2].split("_")[1]
...@@ -128,7 +128,7 @@ for k, v in state_dict.items(): ...@@ -128,7 +128,7 @@ for k, v in state_dict.items():
else: else:
print(f"time_proj 未知 LoRA 类型: {k}") print(f"time_proj 未知 LoRA 类型: {k}")
continue continue
''' """
# fallback # fallback
print(f"未识别结构 key: {k}") print(f"未识别结构 key: {k}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment