update docs

62f4464d · helloyongyang · 1a798103 · 62f4464d · 62f4464d · 62f4464d
Commit 62f4464d authored Jul 09, 2025 by helloyongyang
19 changed files
--- a/docs/ZH_CN/source/conf.py
+++ b/docs/ZH_CN/source/conf.py
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+import logging
+import os
+import sys
+from typing import List
+from sphinx.ext import autodoc
+import sphinxcontrib.redoc
+logger = logging.getLogger(__name__)
+sys.path.append(os.path.abspath("../.."))
+# -- Project information -----------------------------------------------------
+project = "Lightx2v"
+copyright = "2024, Lightx2v Team"
+author = "the Lightx2v Team"
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "myst_parser",
+    "sphinxarg.ext",
+    "sphinxcontrib.redoc",
+    "sphinxcontrib.openapi",
+]
+html_static_path = ["_static"]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns: List[str] = ["**/*.template.rst"]
+# Exclude the prompt "$" when copying code
+copybutton_prompt_text = r"\$ "
+copybutton_prompt_is_regexp = True
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_title = project
+html_theme = "sphinx_book_theme"
+# html_theme = 'sphinx_rtd_theme'
+html_logo = "../../../assets/img_lightx2v.png"
+html_theme_options = {
+    "path_to_docs": "docs/ZH_CN/source",
+    "repository_url": "https://github.com/ModelTC/lightx2v",
+    "use_repository_button": True,
+}
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = ['_static']
+# Generate additional rst documentation here.
+def setup(app):
+    # from docs.source.generate_examples import generate_examples
+    # generate_examples()
+    pass
+# Mock out external dependencies here.
+autodoc_mock_imports = [
+    "cpuinfo",
+    "torch",
+    "transformers",
+    "psutil",
+    "prometheus_client",
+    "sentencepiece",
+    "lightllm" "numpy",
+    "tqdm",
+    "tensorizer",
+]
+for mock_target in autodoc_mock_imports:
+    if mock_target in sys.modules:
+        logger.info(
+            "Potentially problematic mock target (%s) found; "
+            "autodoc_mock_imports cannot mock modules that have already "
+            "been loaded into sys.modules when the sphinx build starts.",
+            mock_target,
+        )
+class MockedClassDocumenter(autodoc.ClassDocumenter):
+    """Remove note about base class when a class is derived from object."""
+    def add_line(self, line: str, source: str, *lineno: int) -> None:
+        if line == "   Bases: :py:class:`object`":
+            return
+        super().add_line(line, source, *lineno)
+autodoc.ClassDocumenter = MockedClassDocumenter
+navigation_with_keys = False
--- a/docs/ZH_CN/source/deploy_guides/deploy_comfyui.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_comfyui.md
+# comfyui部署
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
+# gradio部署
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/deploy_guides/deploy_local_windows.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_local_windows.md
+# 本地windows电脑部署
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/deploy_guides/deploy_server.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_server.md
+# 如何启动服务
+lightx2v 提供异步服务功能。代码入口点在 [这里](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
+### 启动服务
+```shell
+# 修改脚本中的路径
+bash scripts/start_server.sh
+```
+`--port 8000` 选项表示服务将绑定到本地机器的 `8000` 端口。您可以根据需要更改此端口。
+### 客户端发送请求
+```shell
+python scripts/post.py
+```
+服务端点：`/v1/tasks/`
+`scripts/post.py` 中的 `message` 参数如下：
+```python
+message = {
+    "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
+    "negative_prompt": "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
+    "image_path": ""
+}
+```
+1. `prompt`、`negative_prompt` 和 `image_path` 是视频生成的基本输入。`image_path` 可以是空字符串，表示不需要图像输入。
+### 客户端检查服务器状态
+```shell
+python scripts/check_status.py
+```
+服务端点包括：
+1. `/v1/service/status` 用于检查服务状态。返回服务是 `busy` 还是 `idle`。服务只有在 `idle` 时才接受新请求。
+2. `/v1/tasks/` 用于获取服务器接收和完成的所有任务。
+3. `/v1/tasks/{task_id}/status` 用于获取指定 `task_id` 的任务状态。返回任务是 `processing` 还是 `completed`。
+### 客户端随时停止服务器上的当前任务
+```shell
+python scripts/stop_running_task.py
+```
+服务端点：`/v1/tasks/running`
+终止任务后，服务器不会退出，而是返回等待新请求的状态。
+### 在单个节点上启动多个服务
+在单个节点上，您可以使用 `scripts/start_server.sh` 启动多个服务（注意同一 IP 下的端口号必须不同），或者可以使用 `scripts/start_multi_servers.sh` 同时启动多个服务：
+```shell
+num_gpus=8 bash scripts/start_multi_servers.sh
+```
+其中 `num_gpus` 表示要启动的服务数量；服务将从 `--start_port` 开始在连续端口上运行。
+### 多个服务之间的调度
+```shell
+python scripts/post_multi_servers.py
+```
+`post_multi_servers.py` 将根据服务的空闲状态调度多个客户端请求。
+### API 端点总结
+| 端点 | 方法 | 描述 |
+|------|------|------|
+| `/v1/tasks/` | POST | 创建视频生成任务 |
+| `/v1/tasks/form` | POST | 通过表单创建视频生成任务 |
+| `/v1/tasks/` | GET | 获取所有任务列表 |
+| `/v1/tasks/{task_id}/status` | GET | 获取指定任务状态 |
+| `/v1/tasks/{task_id}/result` | GET | 获取指定任务的结果视频文件 |
+| `/v1/tasks/running` | DELETE | 停止当前运行的任务 |
+| `/v1/files/download/{file_path}` | GET | 下载文件 |
+| `/v1/service/status` | GET | 获取服务状态 |
--- a/docs/ZH_CN/source/deploy_guides/for_low_latency.md
+++ b/docs/ZH_CN/source/deploy_guides/for_low_latency.md
+# 低延迟场景部署
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/deploy_guides/for_low_resource.md
+++ b/docs/ZH_CN/source/deploy_guides/for_low_resource.md
+# 低资源场景部署
+xxx
\ No newline at end of file
--- a/docs/en_US/01.prepare_envs.md
+++ b/docs/en_US/01.prepare_envs.md
-# Prepare Environment
+# 快速入门
-We recommend using a docker environment. Here is the [dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags) for lightx2v. Please select the tag with the latest date, for example, 25061301.
+## 准备环境
+我们推荐使用docker环境，这是lightx2v的[dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，请选择一个最新日期的tag，比如25061301
 ```shell
 docker pull lightx2v/lightx2v:25061301
-docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings]  --entrypoint /bin/bash [image_id]
+docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置]  --entrypoint /bin/bash [镜像id]
+```
+对于中国大陆地区，若拉取镜像的时候，网络不稳定，可以从[渡渡鸟](https://docker.aityp.com/r/docker.io/lightx2v/lightx2v)上拉取
+```shell
+docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/lightx2v/lightx2v:25061301
 ```
-If you want to set up the environment yourself using conda, you can refer to the following steps:
+如果你想使用conda自己搭建环境，可以参考如下步骤：
 ```shell
-# clone repo and submodules
+# 下载github代码
 git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
 conda create -n lightx2v python=3.11 && conda activate lightx2v
 pip install -r requirements.txt
-# Install again separately to bypass the version conflict check
+# 单独重新安装transformers，避免pip的冲突检查
-# The Hunyuan model needs to run under this version of transformers. If you do not need to run the Hunyuan model, you can ignore this step.
+# 混元模型需要在4.45.2版本的transformers下运行，如果不需要跑混元模型，可以忽略
 pip install transformers==4.45.2
-# install flash-attention 2
+# 安装 flash-attention 2
 git clone https://github.com/Dao-AILab/flash-attention.git --recursive
 cd flash-attention && python setup.py install
-# install flash-attention 3, only if hopper
+# 安装 flash-attention 3, 用于 hopper 显卡
 cd flash-attention/hopper && python setup.py install
 ```
-# Infer
+## 推理
 ```shell
-# Modify the path in the script
+# 修改脚本中的路径
 bash scripts/run_wan_t2v.sh
 ```
-In addition to the existing input arguments in the script, there are also some necessary parameters in the `${lightx2v_path}/configs/wan_t2v.json` file specified by `--config_json`. You can modify them as needed.
+除了脚本中已有的输入参数，`--config_json`指向的`${lightx2v_path}/configs/wan_t2v.json`中也会存在一些必要的参数，可以根据需要，自行修改。
--- a/docs/ZH_CN/source/index.rst
+++ b/docs/ZH_CN/source/index.rst
+欢迎了解 Lightx2v!
+==================
+.. figure:: ../../../assets/img_lightx2v.png
+  :width: 100%
+  :align: center
+  :alt: Lightx2v
+  :class: no-scaled-link
+.. raw:: html
+   <p style="text-align:center">
+   <strong>一个轻量级的视频生成推理框架
+   </strong>
+文档列表
+-------------
+.. toctree::
+   :maxdepth: 1
+   :caption: 快速入门
+   快速入门 <getting_started/quickstart.md>
+.. toctree::
+   :maxdepth: 1
+   :caption: 方法教程
+   模型量化 <method_tutorials/quantization.md>
+   特征缓存 <method_tutorials/cache.md>
+   注意力机制 <method_tutorials/attention.md>
+   参数卸载 <method_tutorials/offload.md>
+   并行推理 <method_tutorials/parallel.md>
+.. toctree::
+   :maxdepth: 1
+   :caption: 部署指南
+   低延迟场景部署 <deploy_guides/for_low_latency.md>
+   低资源场景部署 <deploy_guides/for_low_resource.md>
+   服务化部署 <deploy_guides/deploy_server.md>
+   gradio部署 <deploy_guides/deploy_gradio.md>
+   comfyui部署 <deploy_guides/deploy_comfyui.md>
+   本地windows电脑部署 <deploy_guides/deploy_local_windows.md>
+.. Indices and tables
+.. ==================
+.. * :ref:`genindex`
+.. * :ref:`modindex`
--- a/docs/ZH_CN/source/method_tutorials/attention.md
+++ b/docs/ZH_CN/source/method_tutorials/attention.md
+# 注意力机制
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/method_tutorials/cache.md
+++ b/docs/ZH_CN/source/method_tutorials/cache.md
+# 特征缓存
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/method_tutorials/offload.md
+++ b/docs/ZH_CN/source/method_tutorials/offload.md
+# 参数卸载
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/method_tutorials/parallel.md
+++ b/docs/ZH_CN/source/method_tutorials/parallel.md
+# 并行推理
+xxx
\ No newline at end of file
--- a/docs/ZH_CN/source/method_tutorials/quantization.md
+++ b/docs/ZH_CN/source/method_tutorials/quantization.md
+# 模型量化
+lightx2v支持对`Dit`中的线性层进行量化推理，支持`w8a8-int8`和`w8a8-fp8`的矩阵乘法。
+## 生产量化模型
+### 自动量化
+lightx2v支持推理时自动对模型权重进行量化，具体可参考[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_auto.json)。
+值得注意的是，需要将配置文件的**mm_config**进行设置：**"mm_config": {"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm","weight_auto_quant": true }**， **mm_type**代表希望使用的量化算子，**weight_auto_quant：true**代表自动转量化模型。
+### 离线量化
+lightx2v同时支持直接加载量化好的权重进行推理，对模型进行离线量化可参考[文档](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme_zh.md)。
+将转换的权重路径，写到[配置文件](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json)中的`dit_quantized_ckpt`中，同时`mm_type**中的**weight_auto_quant`置为`false`即可。
+## 量化推理
+### 自动量化
+```shell
+bash scripts/run_wan_i2v_quant_auto.sh
+```
+### 离线量化
+```shell
+bash scripts/run_wan_i2v_quant_offline.sh
+```
+## 启动量化服务
+建议离线转好量化权重之后，`--config_json`指向到离线量化的`json`文件
+比如，将`scripts/start_server.sh`脚本进行如下改动：
+```shell
+export RUNNING_FLAG=infer
+python -m lightx2v.api_server \
+--model_cls wan2.1 \
+--task t2v \
+--model_path $model_path \
+--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
+--port 8000
+```
+## 高阶量化功能
+具体可参考量化工具[LLMC的文档](https://github.com/ModelTC/llmc/blob/main/docs/zh_cn/source/backend/lightx2v.md)
--- a/docs/en_US/02.start_server.md
+++ b/docs/en_US/02.start_server.md
-# How to Start the Service
-lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
-### Start the Service
-```shell
-# Modify the paths in the script
-bash scripts/start_server.sh
-```
-The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
-### Client Sends Request
-```shell
-python scripts/post.py
-```
-The service endpoint is: `/v1/tasks/`
-The `message` parameter in `scripts/post.py` is as follows:
-```python
-message = {
-    "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
-    "negative_prompt": "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
-    "image_path": "",
-}
-```
-1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed.
-### Client Checks Server Status
-```shell
-python scripts/check_status.py
-```
-The service endpoints include:
-1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
-2. `/v1/tasks/` is used to get all tasks received and completed by the server.
-3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
-### Client Stops the Current Task on the Server at Any Time
-```shell
-python scripts/stop_running_task.py
-```
-The service endpoint is: `/v1/tasks/running`
-After terminating the task, the server will not exit but will return to waiting for new requests.
-### Starting Multiple Services on a Single Node
-On a single node, you can start multiple services using `scripts/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/start_multi_servers.sh`:
-```shell
-num_gpus=8 bash scripts/start_multi_servers.sh
-```
-Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
-### Scheduling Between Multiple Services
-```shell
-python scripts/post_multi_servers.py
-```
-`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
-### API Endpoints Summary
-| Endpoint | Method | Description |
-|----------|--------|-------------|
-| `/v1/tasks/` | POST | Create video generation task |
-| `/v1/tasks/form` | POST | Create video generation task via form |
-| `/v1/tasks/` | GET | Get all task list |
-| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
-| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
-| `/v1/tasks/running` | DELETE | Stop currently running task |
-| `/v1/files/download/{file_path}` | GET | Download file |
-| `/v1/service/status` | GET | Get service status |
--- a/docs/en_US/03.quantization.md
+++ b/docs/en_US/03.quantization.md
-# Quantization
-lightx2v supports quantized inference for linear layers in **Dit**, enabling `w8a8-int8` and `w8a8-fp8` matrix multiplication.
-## Generating Quantized Models
-### Automatic Quantization
-lightx2v supports automatic weight quantization during inference. Refer to the [configuration file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_auto.json).
-**Key configuration**:
-Set `"mm_config": {"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Vllm", "weight_auto_quant": true}`.
- `mm_type`: Specifies the quantized operator
- `weight_auto_quant: true`: Enables automatic model quantization
-### Offline Quantization
-lightx2v also supports direct loading of pre-quantized weights. For offline model quantization, refer to the [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md).
-Configure the [quantization file](https://github.com/ModelTC/lightx2v/tree/main/configs/quantization/wan_i2v_quant_offline.json):
-1. Set `dit_quantized_ckpt` to the converted weight path
-2. Set `weight_auto_quant` to `false` in `mm_type`
-## Quantized Inference
-### Automatic Quantization
-```shell
-bash scripts/run_wan_i2v_quant_auto.sh
-```
-### Offline Quantization
-```shell
-bash scripts/run_wan_i2v_quant_offline.sh
-```
-## Launching Quantization Service
-After offline quantization, point `--config_json` to the offline quantization JSON file.
-Example modification in `scripts/start_server.sh`:
-```shell
-export RUNNING_FLAG=infer
-python -m lightx2v.api_server \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/quantization/wan_i2v_quant_offline.json \
--port 8000
-```
-## Advanced Quantization Features
-Refer to the quantization tool [LLMC documentation](https://github.com/ModelTC/llmc/blob/main/docs/en/source/backend/lightx2v.md) for details.
--- a/lightx2v_kernel/csrc/gemm/mxfp8_scaled_mm_kernels_sm120.cu
+++ b/lightx2v_kernel/csrc/gemm/mxfp8_scaled_mm_kernels_sm120.cu
@@ -223,7 +223,7 @@ void cutlass_scaled_mxfp8_mm_sm120(
  TORCH_CHECK(A.dim() == 2, "a must be a matrix");
  TORCH_CHECK(B.dim() == 2, "b must be a matrix");
  TORCH_CHECK(
      A.sizes()[1] == B.sizes()[1],
      "a and b shapes cannot be multiplied (",

--- a/requirements-docs.txt
+++ b/requirements-docs.txt
+sphinx == 6.2.1
+sphinx-book-theme == 1.0.1
+sphinx-copybutton == 0.5.2
+myst-parser == 2.0.0
+sphinx-argparse
+sphinxcontrib.redoc
+sphinxcontrib.openapi
--- a/tools/extract/convert_vigen_to_x2v_lora.py
+++ b/tools/extract/convert_vigen_to_x2v_lora.py
@@ -103,7 +103,7 @@ for k, v in state_dict.items():
        else:
            print(f"text_embedder 未知 LoRA 类型: {k}")
            continue
-    '''
+    """
    # === Time Embedding ===
    elif k.startswith("condition_embedder.time_embedder.linear_"):
        layer_id = parts[2].split("_")[1]
@@ -128,7 +128,7 @@ for k, v in state_dict.items():
        else:
            print(f"time_proj 未知 LoRA 类型: {k}")
            continue
-    '''        
+    """
    # fallback
    print(f"未识别结构 key: {k}")