Commit e2778d0d authored by litzh's avatar litzh
Browse files

Initial commit

parents
Pipeline #3370 canceled with stages
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import logging
import os
import sys
from typing import List
import sphinxcontrib.redoc
from sphinx.ext import autodoc
logger = logging.getLogger(__name__)
sys.path.append(os.path.abspath("../.."))
# -- Project information -----------------------------------------------------
project = "Lightx2v"
copyright = "2025, Lightx2v Team"
author = "the Lightx2v Team"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"myst_parser",
"sphinxarg.ext",
"sphinxcontrib.redoc",
"sphinxcontrib.openapi",
]
html_static_path = ["_static"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.rst"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
copybutton_prompt_is_regexp = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_title = project
html_theme = "sphinx_book_theme"
# html_theme = 'sphinx_rtd_theme'
html_logo = "../../../assets/img_lightx2v.png"
html_theme_options = {
"path_to_docs": "docs/ZH_CN/source",
"repository_url": "https://github.com/ModelTC/lightx2v",
"use_repository_button": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Generate additional rst documentation here.
def setup(app):
# from docs.source.generate_examples import generate_examples
# generate_examples()
pass
# Mock out external dependencies here.
autodoc_mock_imports = [
"cpuinfo",
"torch",
"transformers",
"psutil",
"prometheus_client",
"sentencepiece",
"lightllmnumpy",
"tqdm",
"tensorizer",
]
for mock_target in autodoc_mock_imports:
if mock_target in sys.modules:
logger.info(
"Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
mock_target,
)
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == " Bases: :py:class:`object`":
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
欢迎了解 Lightx2v 论文收藏集!
==================
.. figure:: ../../../assets/img_lightx2v.png
:width: 80%
:align: center
:alt: Lightx2v
:class: no-scaled-link
.. raw:: html
<div align="center" style="font-family: charter;">
<a href="https://opensource.org/licenses/Apache-2.0"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"></a>
<a href="https://deepwiki.com/ModelTC/lightx2v"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
<a href="https://lightx2v-en.readthedocs.io/en/latest"><img src="https://img.shields.io/badge/docs-English-99cc2" alt="Doc"></a>
<a href="https://lightx2v-zhcn.readthedocs.io/zh-cn/latest"><img src="https://img.shields.io/badge/文档-中文-99cc2" alt="Doc"></a>
<a href="https://lightx2v-papers-zhcn.readthedocs.io/zh-cn/latest"><img src="https://img.shields.io/badge/论文集-中文-99cc2" alt="Papers"></a>
<a href="https://hub.docker.com/r/lightx2v/lightx2v/tags"><img src="https://badgen.net/badge/icon/docker?icon=docker&label" alt="Docker"></a>
</div>
<div align="center" style="font-family: charter;">
<strong>LightX2V: 一个轻量级的视频生成推理框架</strong>
</div>
LightX2V 是一个轻量级的视频生成推理框架。这里是我们维护的一个视频生成推理加速相关的论文收藏集,帮助你快速了解视频生成推理加速相关的经典方法和最新进展。
GitHub: https://github.com/ModelTC/lightx2v
HuggingFace: https://huggingface.co/lightx2v
论文收藏集
-------------
.. toctree::
:maxdepth: 1
:caption: 论文分类
图像视频生成基础 <papers/generation_basics.md>
开源模型 <papers/models.md>
模型量化 <papers/quantization.md>
特征缓存 <papers/cache.md>
注意力机制 <papers/attention.md>
参数卸载 <papers/offload.md>
并行推理 <papers/parallel.md>
变分辨率推理 <papers/changing_resolution.md>
步数蒸馏 <papers/step_distill.md>
自回归模型 <papers/autoregressive.md>
vae加速 <papers/vae.md>
prompt增强 <papers/prompt_enhance.md>
强化学习 <papers/RL.md>
# 注意力机制
### Sparse VideoGen: Accelerating Video Diffusion Transformers with Spatial-Temporal Sparsity
[paper](https://arxiv.org/abs/2502.01776) | [code](https://github.com/svg-project/Sparse-VideoGen)
### Sparse VideoGen2: Accelerate Video Generation with Sparse Attention via Semantic-Aware Permutation
[paper](https://arxiv.org/abs/2505.18875)
### Training-free and Adaptive Sparse Attention for Efficient Long Video Generation
[paper](https://arxiv.org/abs/2502.21079)
### DSV: Exploiting Dynamic Sparsity to Accelerate Large-Scale Video DiT Training
[paper](https://arxiv.org/abs/2502.07590)
### MMInference: Accelerating Pre-filling for Long-Context VLMs via Modality-Aware Permutation Sparse Attention
[paper](https://github.com/microsoft/MInference)
### FPSAttention: Training-Aware FP8 and Sparsity Co-Design for Fast Video Diffusion
[paper](https://arxiv.org/abs/2506.04648)
### VORTA: Efficient Video Diffusion via Routing Sparse Attention
[paper](https://arxiv.org/abs/2505.18809)
### Training-Free Efficient Video Generation via Dynamic Token Carving
[paper](https://arxiv.org/abs/2505.16864)
### RainFusion: Adaptive Video Generation Acceleration via Multi-Dimensional Visual Redundancy
[paper](https://arxiv.org/abs/2505.21036)
### Radial Attention: O(nlogn) Sparse Attention with Energy Decay for Long Video Generation
[paper](https://arxiv.org/abs/2506.19852)
### VMoBA: Mixture-of-Block Attention for Video Diffusion Models
[paper](https://arxiv.org/abs/2506.23858)
### SpargeAttention: Accurate and Training-free Sparse Attention Accelerating Any Model Inference
[paper](https://arxiv.org/abs/2502.18137) | [code](https://github.com/thu-ml/SpargeAttn)
### Fast Video Generation with Sliding Tile Attention
[paper](https://arxiv.org/abs/2502.04507) | [code](https://github.com/hao-ai-lab/FastVideo)
### PAROAttention: Pattern-Aware ReOrdering for Efficient Sparse and Quantized Attention in Visual Generation Models
[paper](https://arxiv.org/abs/2506.16054)
### Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light
[paper](https://arxiv.org/abs/2504.16922)
### Astraea: A GPU-Oriented Token-wise Acceleration Framework for Video Diffusion Transformers
[paper](https://arxiv.org/abs/2506.05096)
### ∇NABLA: Neighborhood Adaptive Block-Level Attention
[paper](https://arxiv.org/abs/2507.13546v1) [code](https://github.com/gen-ai-team/Wan2.1-NABLA)
### Compact Attention: Exploiting Structured Spatio-Temporal Sparsity for Fast Video Generation
[paper](https://arxiv.org/abs/2508.12969)
### A Survey of Efficient Attention Methods: Hardware-efficient, Sparse, Compact, and Linear Attention
[paper](https://attention-survey.github.io/files/Attention_Survey.pdf)
### Bidirectional Sparse Attention for Faster Video Diffusion Training
[paper](https://arxiv.org/abs/2509.01085)
### Mixture of Contexts for Long Video Generation
[paper](https://arxiv.org/abs/2508.21058)
### LoViC: Efficient Long Video Generation with Context Compression
[paper](https://arxiv.org/abs/2507.12952)
### MagiAttention: A Distributed Attention Towards Linear Scalability for Ultra-Long Context, Heterogeneous Mask Training
[paper](https://sandai-org.github.io/MagiAttention/blog/) [code](https://github.com/SandAI-org/MagiAttention)
### DraftAttention: Fast Video Diffusion via Low-Resolution Attention Guidance
[paper](https://arxiv.org/abs/2505.14708) [code](https://github.com/shawnricecake/draft-attention)
### XAttention: Block Sparse Attention with Antidiagonal Scoring
[paper](https://arxiv.org/abs/2503.16428) [code](https://github.com/mit-han-lab/x-attention)
### VSA: Faster Video Diffusion with Trainable Sparse Attention
[paper](https://arxiv.org/abs/2505.13389) [code](https://github.com/hao-ai-lab/FastVideo)
### QuantSparse: Comprehensively Compressing Video Diffusion Transformer with Model Quantization and Attention Sparsification
[paper](https://arxiv.org/abs/2509.23681)
### SLA: Beyond Sparsity in Diffusion Transformers via Fine-Tunable Sparse-Linear Attention
[paper](https://arxiv.org/abs/2509.24006)
<div align=center>
# Open-Source Models
📢: Collections of Awesome Open-Source Model Resources.
</div>
## 📚 <span id="head1"> *Contents* </span>
- Open-Source Models
- [Foundation Models](#foundation-models)
- [World Models](#world-models)
### Foundation Models:
- **Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets**, Technical Report 2023.
*Andreas Blattmann, Tim Dockhorn, Sumith Kulal, Daniel Mendelevitch, Maciej Kilian, et al.*
[[Paper](https://arxiv.org/abs/2311.15127)] [[Code](https://github.com/Stability-AI/generative-models)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/UNet-brown)
<details> <summary>BibTex</summary>
```text
@article{blattmann2023stable,
title={Stable video diffusion: Scaling latent video diffusion models to large datasets},
author={Blattmann, Andreas and Dockhorn, Tim and Kulal, Sumith and Mendelevitch, Daniel and Kilian, Maciej and Lorenz, Dominik and Levi, Yam and English, Zion and Voleti, Vikram and Letts, Adam and others},
journal={arXiv preprint arXiv:2311.15127},
year={2023}
}
```
</details>
- **Wan: Open and Advanced Large-Scale Video Generative Models**, Technical Report 2025.
*Team Wan, Ang Wang, Baole Ai, Bin Wen, Chaojie Mao, Chen-Wei Xie, et al.*
[[Paper](https://arxiv.org/abs/2503.20314)] [[Code](https://github.com/Wan-Video/Wan2.1)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{wan2025wan,
title={Wan: Open and advanced large-scale video generative models},
author={Wan, Team and Wang, Ang and Ai, Baole and Wen, Bin and Mao, Chaojie and Xie, Chen-Wei and Chen, Di and Yu, Feiwu and Zhao, Haiming and Yang, Jianxiao and others},
journal={arXiv preprint arXiv:2503.20314},
year={2025}
}
```
</details>
- **HunyuanVideo: A Systematic Framework For Large Video Generation Model**, Technical Report 2024.
*Weijie Kong, Qi Tian, Zijian Zhang, Rox Min, Zuozhuo Dai, Jin Zhou, et al.*
[[Paper](https://arxiv.org/abs/2412.03603)] [[Code](https://github.com/Tencent-Hunyuan/HunyuanVideo)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{kong2024hunyuanvideo,
title={Hunyuanvideo: A systematic framework for large video generative models},
author={Kong, Weijie and Tian, Qi and Zhang, Zijian and Min, Rox and Dai, Zuozhuo and Zhou, Jin and Xiong, Jiangfeng and Li, Xin and Wu, Bo and Zhang, Jianwei and others},
journal={arXiv preprint arXiv:2412.03603},
year={2024}
}
```
</details>
- **CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer**, ICLR 2025.
*Zhuoyi Yang, Jiayan Teng, Wendi Zheng, Ming Ding, Shiyu Huang, Jiazheng Xu, et al.*
[[Paper](https://arxiv.org/abs/2408.06072)] [[Code](https://github.com/zai-org/CogVideo)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{yang2024cogvideox,
title={Cogvideox: Text-to-video diffusion models with an expert transformer},
author={Yang, Zhuoyi and Teng, Jiayan and Zheng, Wendi and Ding, Ming and Huang, Shiyu and Xu, Jiazheng and Yang, Yuanming and Hong, Wenyi and Zhang, Xiaohan and Feng, Guanyu and others},
journal={arXiv preprint arXiv:2408.06072},
year={2024}
}
```
</details>
- **SkyReels V2: Infinite-Length Film Generative Model**, Technical Report 2025.
*Guibin Chen, Dixuan Lin, Jiangping Yang, Chunze Lin, Junchen Zhu, Mingyuan Fan, et al.*
[[Paper](https://arxiv.org/abs/2504.13074)] [[Code](https://github.com/SkyworkAI/SkyReels-V2)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@misc{chen2025skyreelsv2infinitelengthfilmgenerative,
title={SkyReels-V2: Infinite-length Film Generative Model},
author={Guibin Chen and Dixuan Lin and Jiangping Yang and Chunze Lin and Junchen Zhu and Mingyuan Fan and Hao Zhang and Sheng Chen and Zheng Chen and Chengcheng Ma and Weiming Xiong and Wei Wang and Nuo Pang and Kang Kang and Zhiheng Xu and Yuzhe Jin and Yupeng Liang and Yubing Song and Peng Zhao and Boyuan Xu and Di Qiu and Debang Li and Zhengcong Fei and Yang Li and Yahui Zhou},
year={2025},
eprint={2504.13074},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2504.13074},
}
```
</details>
- **Open-Sora: Democratizing Efficient Video Production for All**, Technical Report 2025.
*Xiangyu Peng, Zangwei Zheng, Chenhui Shen, Tom Young, Xinying Guo, et al.*
[[Paper](https://arxiv.org/abs/2503.09642v2)] [[Code](https://github.com/hpcaitech/Open-Sora)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/DIT-brown) ![](https://img.shields.io/badge/V2V-orange)
<details> <summary>BibTex</summary>
```text
@article{peng2025open,
title={Open-sora 2.0: Training a commercial-level video generation model in $200 k},
author={Peng, Xiangyu and Zheng, Zangwei and Shen, Chenhui and Young, Tom and Guo, Xinying and Wang, Binluo and Xu, Hang and Liu, Hongxin and Jiang, Mingyan and Li, Wenjun and others},
journal={arXiv preprint arXiv:2503.09642},
year={2025}
}
```
</details>
- **Pyramidal Flow Matching for Efficient Video Generative Modeling**, Technical Report 2024.
*Yang Jin, Zhicheng Sun, Ningyuan Li, Kun Xu, Kun Xu, et al.*
[[Paper](https://arxiv.org/abs/2410.05954)] [[Code](https://github.com/jy0205/Pyramid-Flow)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/AR-brown)
<details> <summary>BibTex</summary>
```text
@article{jin2024pyramidal,
title={Pyramidal flow matching for efficient video generative modeling},
author={Jin, Yang and Sun, Zhicheng and Li, Ningyuan and Xu, Kun and Jiang, Hao and Zhuang, Nan and Huang, Quzhe and Song, Yang and Mu, Yadong and Lin, Zhouchen},
journal={arXiv preprint arXiv:2410.05954},
year={2024}
}
```
</details>
- **MAGI-1: Autoregressive Video Generation at Scale**, Technical Report 2025.
*Sand.ai, Hansi Teng, Hongyu Jia, Lei Sun, Lingzhi Li, Maolin Li, Mingqiu Tang, et al.*
[[Paper](https://arxiv.org/pdf/2505.13211)] [[Code](https://github.com/SandAI-org/Magi-1)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/AR-brown) ![](https://img.shields.io/badge/V2V-orange)
<details> <summary>BibTex</summary>
```text
@article{teng2025magi,
title={MAGI-1: Autoregressive Video Generation at Scale},
author={Teng, Hansi and Jia, Hongyu and Sun, Lei and Li, Lingzhi and Li, Maolin and Tang, Mingqiu and Han, Shuai and Zhang, Tianning and Zhang, WQ and Luo, Weifeng and others},
journal={arXiv preprint arXiv:2505.13211},
year={2025}
}
```
</details>
- **From Slow Bidirectional to Fast Autoregressive Video Diffusion Models**, CVPR 2025.
*Tianwei Yin, Qiang Zhang, Richard Zhang, William T. Freeman, Fredo Durand, et al.*
[[Paper](http://arxiv.org/abs/2412.07772)] [[Code](https://github.com/tianweiy/CausVid)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/AR-brown)
<details> <summary>BibTex</summary>
```text
@inproceedings{yin2025slow,
title={From slow bidirectional to fast autoregressive video diffusion models},
author={Yin, Tianwei and Zhang, Qiang and Zhang, Richard and Freeman, William T and Durand, Fredo and Shechtman, Eli and Huang, Xun},
booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference},
pages={22963--22974},
year={2025}
}
```
</details>
- **Packing Input Frame Context in Next-Frame Prediction Models for Video Generation**, arxiv 2025.
*Lvmin Zhang, Maneesh Agrawala.*
[[Paper](https://arxiv.org/abs/2504.12626)] [[Code](https://github.com/lllyasviel/FramePack)] ![](https://img.shields.io/badge/T2V-blue) ![](https://img.shields.io/badge/I2V-green) ![](https://img.shields.io/badge/AR-brown)
<details> <summary>BibTex</summary>
```text
@article{zhang2025packing,
title={Packing input frame context in next-frame prediction models for video generation},
author={Zhang, Lvmin and Agrawala, Maneesh},
journal={arXiv preprint arXiv:2504.12626},
year={2025}
}
```
</details>
### World Models:
- **Matrix-Game 2.0: An Open-Source, Real-Time, and Streaming Interactive World Model**, Technical Report 2025.
*Xianglong He, Chunli Peng, Zexiang Liu, Boyang Wang, Yifan Zhang, et al.*
[[Paper](https://arxiv.org/abs/2508.13009)] [[Code](https://matrix-game-v2.github.io/)] ![](https://img.shields.io/badge/keyboard-blue) ![](https://img.shields.io/badge/mouse-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{he2025matrix,
title={Matrix-Game 2.0: An Open-Source, Real-Time, and Streaming Interactive World Model},
author={He, Xianglong and Peng, Chunli and Liu, Zexiang and Wang, Boyang and Zhang, Yifan and Cui, Qi and Kang, Fei and Jiang, Biao and An, Mengyin and Ren, Yangyang and others},
journal={arXiv preprint arXiv:2508.13009},
year={2025}
}
```
</details>
- **HunyuanWorld 1.0: Generating Immersive, Explorable, and Interactive 3D Worlds from Words or Pixels**, Technical Report 2025.
*HunyuanWorld Team, Zhenwei Wang, Yuhao Liu, Junta Wu, Zixiao Gu, Haoyuan Wang, et al.*
[[Paper](https://arxiv.org/abs/2507.21809)] [[Code](https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0)] ![](https://img.shields.io/badge/image-blue) ![](https://img.shields.io/badge/text-green) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{team2025hunyuanworld,
title={HunyuanWorld 1.0: Generating Immersive, Explorable, and Interactive 3D Worlds from Words or Pixels},
author={Team, HunyuanWorld and Wang, Zhenwei and Liu, Yuhao and Wu, Junta and Gu, Zixiao and Wang, Haoyuan and Zuo, Xuhui and Huang, Tianyu and Li, Wenhuan and Zhang, Sheng and others},
journal={arXiv preprint arXiv:2507.21809},
year={2025}
}
```
</details>
- **Cosmos-Drive-Dreams: Scalable Synthetic Driving Data Generation with World Foundation Models**, Technical Report 2025.
*Xuanchi Ren, Yifan Lu, Tianshi Cao, Ruiyuan Gao, Shengyu Huang, Amirmojtaba Sabour, et al.*
[[Paper](https://arxiv.org/abs/2506.09042)] [[Code](https://research.nvidia.com/labs/toronto-ai/cosmos_drive_dreams)] ![](https://img.shields.io/badge/drive-blue) ![](https://img.shields.io/badge/DIT-brown)
<details> <summary>BibTex</summary>
```text
@article{ren2025cosmos,
title={Cosmos-Drive-Dreams: Scalable Synthetic Driving Data Generation with World Foundation Models},
author={Ren, Xuanchi and Lu, Yifan and Cao, Tianshi and Gao, Ruiyuan and Huang, Shengyu and Sabour, Amirmojtaba and Shen, Tianchang and Pfaff, Tobias and Wu, Jay Zhangjie and Chen, Runjian and others},
journal={arXiv preprint arXiv:2506.09042},
year={2025}
}
```
</details>
- **Genie 3: A new frontier for world models**, Blog 2025.
*Google DeepMind*
[[Blog](https://deepmind.google/discover/blog/genie-3-a-new-frontier-for-world-models/)] ![](https://img.shields.io/badge/event-blue) ![](https://img.shields.io/badge/DIT-brown)
- **GAIA-2: A Controllable Multi-View Generative World Model for Autonomous Driving.**, Technical Report 2025.
*Lloyd Russell, Anthony Hu, Lorenzo Bertoni, George Fedoseev, Jamie Shotton, et al.*
[[Paper](https://arxiv.org/abs/2503.20523)] [[Code](https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0)] ![](https://img.shields.io/badge/drive-blue) ![](https://img.shields.io/badge/transformer-brown)
<details> <summary>BibTex</summary>
```text
@article{russell2025gaia,
title={Gaia-2: A controllable multi-view generative world model for autonomous driving},
author={Russell, Lloyd and Hu, Anthony and Bertoni, Lorenzo and Fedoseev, George and Shotton, Jamie and Arani, Elahe and Corrado, Gianluca},
journal={arXiv preprint arXiv:2503.20523},
year={2025}
}
```
</details>
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
tools:
python: "3.10"
formats:
- epub
sphinx:
configuration: docs/ZH_CN/source/conf.py
python:
install:
- requirements: requirements-docs.txt
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import logging
import os
import sys
from typing import List
import sphinxcontrib.redoc
from sphinx.ext import autodoc
logger = logging.getLogger(__name__)
sys.path.append(os.path.abspath("../.."))
# -- Project information -----------------------------------------------------
project = "Lightx2v"
copyright = "2025, Lightx2v Team"
author = "the Lightx2v Team"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.mathjax",
"myst_parser",
"sphinxarg.ext",
"sphinxcontrib.redoc",
"sphinxcontrib.openapi",
]
myst_enable_extensions = [
"dollarmath",
"amsmath",
]
html_static_path = ["_static"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.rst"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
copybutton_prompt_is_regexp = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_title = project
html_theme = "sphinx_book_theme"
# html_theme = 'sphinx_rtd_theme'
html_logo = "../../../assets/img_lightx2v.png"
html_theme_options = {
"path_to_docs": "docs/ZH_CN/source",
"repository_url": "https://github.com/ModelTC/lightx2v",
"use_repository_button": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Generate additional rst documentation here.
def setup(app):
# from docs.source.generate_examples import generate_examples
# generate_examples()
pass
# Mock out external dependencies here.
autodoc_mock_imports = [
"cpuinfo",
"torch",
"transformers",
"psutil",
"prometheus_client",
"sentencepiece",
"lightllmnumpy",
"tqdm",
"tensorizer",
]
for mock_target in autodoc_mock_imports:
if mock_target in sys.modules:
logger.info(
"Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
mock_target,
)
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == " Bases: :py:class:`object`":
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
# ComfyUI 部署
## ComfyUI-Lightx2vWrapper
LightX2V 的官方 ComfyUI 集成节点已经发布在独立仓库中,提供了完整的模块化配置系统和优化功能。
### 项目地址
- GitHub: [https://github.com/ModelTC/ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper)
### 主要特性
- 模块化配置系统:为视频生成的各个方面提供独立节点
- 支持文生视频(T2V)和图生视频(I2V)两种生成模式
- 高级优化功能:
- TeaCache 加速(最高 3 倍加速)
- 量化支持(int8、fp8)
- CPU 卸载内存优化
- 轻量级 VAE 选项
- LoRA 支持:可链式组合多个 LoRA 模型
- 多模型支持:wan2.1、hunyuan 等架构
### 安装和使用
请访问上述 GitHub 仓库查看详细的安装说明、使用教程和示例工作流。
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment