Commit a1ebc651 authored by xuwx1's avatar xuwx1
Browse files

updata lightx2v

parent 5a4db490
Pipeline #3149 canceled with stages
{
"infer_steps": 4,
"target_video_length": 81,
"text_len": 512,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": [
4.0,
3.0
],
"sample_shift": 5.0,
"enable_cfg": false,
"cpu_offload": true,
"offload_granularity": "model",
"t5_cpu_offload": false,
"vae_cpu_offload": false,
"boundary_step_index": 2,
"denoising_step_list": [
1000,
750,
500,
250
],
"lora_configs": [
{
"name": "low_noise_model",
"path": "Wan2.1-T2V-14B/loras/Wan21_T2V_14B_lightx2v_cfg_step_distill_lora_rank64.safetensors",
"strength": 1.0
}
]
}
{
"infer_steps": 50,
"target_video_length": 121,
"text_len": 512,
"target_height": 704,
"target_width": 1280,
"num_channels_latents": 48,
"vae_stride": [
4,
16,
16
],
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 5.0,
"sample_shift": 5.0,
"enable_cfg": true,
"cpu_offload": false,
"offload_granularity": "model",
"t5_cpu_offload": false,
"vae_cpu_offload": false,
"fps": 24,
"use_image_encoder": false
}
{
"infer_steps": 50,
"target_video_length": 121,
"text_len": 512,
"target_height": 704,
"target_width": 1280,
"num_channels_latents": 48,
"vae_stride": [
4,
16,
16
],
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 5.0,
"sample_shift": 5.0,
"enable_cfg": true,
"fps": 24,
"use_image_encoder": false,
"cpu_offload": true,
"offload_granularity": "model",
"t5_cpu_offload": false,
"vae_cpu_offload": false,
"vae_offload_cache": true
}
{
"infer_steps": 50,
"target_video_length": 121,
"text_len": 512,
"target_height": 704,
"target_width": 1280,
"num_channels_latents": 48,
"vae_stride": [
4,
16,
16
],
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 5.0,
"sample_shift": 5.0,
"enable_cfg": true,
"cpu_offload": false,
"offload_granularity": "model",
"t5_cpu_offload": false,
"vae_cpu_offload": false,
"fps": 24
}
{
"infer_steps": 50,
"target_video_length": 121,
"text_len": 512,
"target_height": 704,
"target_width": 1280,
"num_channels_latents": 48,
"vae_stride": [
4,
16,
16
],
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 5.0,
"sample_shift": 5.0,
"enable_cfg": true,
"fps": 24,
"cpu_offload": true,
"offload_granularity": "model",
"t5_cpu_offload": false,
"vae_cpu_offload": false,
"vae_offload_cache": true
}
FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
&& python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy
RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
RUN cd FlashVSR && pip install --no-cache-dir -v -e .
COPY lightx2v_kernel /app/lightx2v_kernel
RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
uv build --wheel \
-Cbuild-dir=build . \
-Ccmake.define.CUTLASS_PATH=/app/cutlass \
--verbose \
--color=always \
--no-build-isolation \
&& pip install dist/*whl --force-reinstall --no-deps \
&& rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
# q8f for base docker
RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# q8f for 5090 docker
# RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
# RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
WORKDIR /workspace
FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
&& python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy
RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
RUN cd FlashVSR && pip install --no-cache-dir -v -e .
COPY lightx2v_kernel /app/lightx2v_kernel
RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
uv build --wheel \
-Cbuild-dir=build . \
-Ccmake.define.CUTLASS_PATH=/app/cutlass \
--verbose \
--color=always \
--no-build-isolation \
&& pip install dist/*whl --force-reinstall --no-deps \
&& rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
# q8f for base docker
# RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
# RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# q8f for 5090 docker
RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
WORKDIR /workspace
FROM cambricon-base/pytorch:v25.10.0-torch2.8.0-torchmlu1.29.1-ubuntu22.04-py310 AS base
WORKDIR /workspace/LightX2V
# Set envs
ENV PYTHONPATH=/workspace/LightX2V
ENV LD_LIBRARY_PATH=/usr/local/neuware/lib64:${LD_LIBRARY_PATH}
# Install deps
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg && \
pip install --no-cache-dir \
ftfy \
imageio \
imageio-ffmpeg \
loguru \
aiohttp \
gguf \
diffusers \
peft==0.17.0 \
transformers==4.57.1 &&
# Copy files
COPY app app
COPY assets assets
COPY configs configs
COPY lightx2v lightx2v
COPY lightx2v_kernel lightx2v_kernel
COPY lightx2v_platform lightx2v_platform
COPY scripts scripts
COPY test_cases test_cases
COPY tools tools
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev\
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git -b v0.10.0 && cd vllm \
&& python use_existing_torch.py && pip install -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git -b v0.4.10 && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord
RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y
RUN git clone https://github.com/Dao-AILab/flash-attention.git -b v2.8.3 --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/KONAKONA666/q8_kernels.git
RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu//libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.24.12 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.24.12 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
WORKDIR /workspace
FROM node:alpine3.21 AS frontend_builder
COPY lightx2v /opt/lightx2v
RUN cd /opt/lightx2v/deploy/server/frontend \
&& npm install \
&& npm run build
FROM lightx2v/lightx2v:25111101-cu128 AS base
RUN mkdir /workspace/LightX2V
WORKDIR /workspace/LightX2V
ENV PYTHONPATH=/workspace/LightX2V
# for multi-person & animate
RUN pip install ultralytics moviepy pydub pyannote.audio onnxruntime decord peft onnxruntime pandas matplotlib loguru sentencepiece
RUN export COMMIT=0e78a118995e66bb27d78518c4bd9a3e95b4e266 \
&& export TORCH_CUDA_ARCH_LIST="9.0" \
&& git clone --depth 1 https://github.com/facebookresearch/sam2.git \
&& cd sam2 \
&& git fetch --depth 1 origin $COMMIT \
&& git checkout $COMMIT \
&& python setup.py install
COPY tools tools
COPY assets assets
COPY configs configs
COPY lightx2v lightx2v
COPY lightx2v_kernel lightx2v_kernel
COPY lightx2v_platform lightx2v_platform
COPY --from=frontend_builder /opt/lightx2v/deploy/server/frontend/dist lightx2v/deploy/server/frontend/dist
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
tools:
python: "3.10"
formats:
- epub
sphinx:
configuration: docs/EN/source/conf.py
python:
install:
- requirements: requirements-docs.txt
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import logging
import os
import sys
from typing import List
import sphinxcontrib.redoc
from sphinx.ext import autodoc
logger = logging.getLogger(__name__)
sys.path.append(os.path.abspath("../.."))
# -- Project information -----------------------------------------------------
project = "Lightx2v"
copyright = "2025, Lightx2v Team"
author = "the Lightx2v Team"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.mathjax",
"myst_parser",
"sphinxarg.ext",
"sphinxcontrib.redoc",
"sphinxcontrib.openapi",
]
myst_enable_extensions = [
"dollarmath",
"amsmath",
]
html_static_path = ["_static"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.rst"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
copybutton_prompt_is_regexp = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_title = project
html_theme = "sphinx_book_theme"
# html_theme = 'sphinx_rtd_theme'
html_logo = "../../../assets/img_lightx2v.png"
html_theme_options = {
"path_to_docs": "docs/EN/source",
"repository_url": "https://github.com/ModelTC/lightx2v",
"use_repository_button": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Generate additional rst documentation here.
def setup(app):
# from docs.source.generate_examples import generate_examples
# generate_examples()
pass
# Mock out external dependencies here.
autodoc_mock_imports = [
"cpuinfo",
"torch",
"transformers",
"psutil",
"prometheus_client",
"sentencepiece",
"lightllmnumpy",
"tqdm",
"tensorizer",
]
for mock_target in autodoc_mock_imports:
if mock_target in sys.modules:
logger.info(
"Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
mock_target,
)
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == " Bases: :py:class:`object`":
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
# ComfyUI Deployment
## ComfyUI-Lightx2vWrapper
The official ComfyUI integration nodes for LightX2V are now available in a dedicated repository, providing a complete modular configuration system and optimization features.
### Project Repository
- GitHub: [https://github.com/ModelTC/ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper)
### Key Features
- Modular Configuration System: Separate nodes for each aspect of video generation
- Support for both Text-to-Video (T2V) and Image-to-Video (I2V) generation modes
- Advanced Optimizations:
- TeaCache acceleration (up to 3x speedup)
- Quantization support (int8, fp8)
- Memory optimization with CPU offloading
- Lightweight VAE options
- LoRA Support: Chain multiple LoRA models for customization
- Multiple Model Support: wan2.1, hunyuan architectures
### Installation and Usage
Please visit the GitHub repository above for detailed installation instructions, usage tutorials, and example workflows.
# Gradio Deployment Guide
## 📖 Overview
Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
For Windows systems, we provide a convenient one-click deployment solution with automatic environment configuration and intelligent parameter optimization. Please refer to the [One-Click Gradio Startup (Recommended)](./deploy_local_windows.md/#one-click-gradio-startup-recommended) section for detailed instructions.
![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
## 📁 File Structure
```
LightX2V/app/
├── gradio_demo.py # English interface demo
├── gradio_demo_zh.py # Chinese interface demo
├── run_gradio.sh # Startup script
├── README.md # Documentation
├── outputs/ # Generated video save directory
└── inference_logs.log # Inference logs
```
This project contains two main demo files:
- `gradio_demo.py` - English interface version
- `gradio_demo_zh.py` - Chinese interface version
## 🚀 Quick Start
### Environment Requirements
Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
#### Recommended Optimization Library Configuration
-[Flash attention](https://github.com/Dao-AILab/flash-attention)
-[Sage attention](https://github.com/thu-ml/SageAttention)
-[vllm-kernel](https://github.com/vllm-project/vllm)
-[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
-[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
Install according to the project homepage tutorials for each operator as needed.
### 📥 Model Download
Refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
#### wan2.1 Model Directory Structure
```
models/
├── wan2.1_i2v_720p_lightx2v_4step.safetensors # Original precision
├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors # FP8 quantization
├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors # INT8 quantization
├── wan2.1_i2v_720p_int8_lightx2v_4step_split # INT8 quantization block storage directory
├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split # FP8 quantization block storage directory
├── Other weights (e.g., t2v)
├── t5/clip/xlm-roberta-large/google # text and image encoder
├── vae/lightvae/lighttae # vae
└── config.json # Model configuration file
```
#### wan2.2 Model Directory Structure
```
models/
├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors # high noise original precision
├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors # high noise FP8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors # high noise INT8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split # high noise INT8 quantization block storage directory
├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors # low noise original precision
├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors # low noise FP8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors # low noise INT8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split # low noise INT8 quantization block storage directory
├── t5/clip/xlm-roberta-large/google # text and image encoder
├── vae/lightvae/lighttae # vae
└── config.json # Model configuration file
```
**📝 Download Instructions**:
- Model weights can be downloaded from HuggingFace:
- [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
- [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoders)
- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
### Startup Methods
#### Method 1: Using Startup Script (Recommended)
**Linux Environment:**
```bash
# 1. Edit the startup script to configure relevant paths
cd app/
vim run_gradio.sh
# Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path
# - model_path: Model root directory path (contains all model files)
# 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: /mnt/ssd/models/ or /data/ssd/models/
# 2. Run the startup script
bash run_gradio.sh
# 3. Or start with parameters
bash run_gradio.sh --lang en --port 8032
bash run_gradio.sh --lang zh --port 7862
```
**Windows Environment:**
```cmd
# 1. Edit the startup script to configure relevant paths
cd app\
notepad run_gradio_win.bat
# Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path
# - model_path: Model root directory path (contains all model files)
# 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: D:\models\ or E:\models\
# 2. Run the startup script
run_gradio_win.bat
# 3. Or start with parameters
run_gradio_win.bat --lang en --port 8032
run_gradio_win.bat --lang zh --port 7862
```
#### Method 2: Direct Command Line Startup
```bash
pip install -v git+https://github.com/ModelTC/LightX2V.git
```
**Linux Environment:**
**English Interface Version:**
```bash
python gradio_demo.py \
--model_path /path/to/models \
--server_name 0.0.0.0 \
--server_port 7862
```
**Chinese Interface Version:**
```bash
python gradio_demo_zh.py \
--model_path /path/to/models \
--server_name 0.0.0.0 \
--server_port 7862
```
**Windows Environment:**
**English Interface Version:**
```cmd
python gradio_demo.py ^
--model_path D:\models ^
--server_name 127.0.0.1 ^
--server_port 7862
```
**Chinese Interface Version:**
```cmd
python gradio_demo_zh.py ^
--model_path D:\models ^
--server_name 127.0.0.1 ^
--server_port 7862
```
**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 📋 Command Line Parameters
| Parameter | Type | Required | Default | Description |
|-----------|------|----------|---------|-------------|
| `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
| `--server_port` | int | ❌ | 7862 | Server port |
| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 🎯 Features
### Model Configuration
- **Model Type**: Supports wan2.1 and wan2.2 model architectures
- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
### Input Parameters
- **Prompt**: Describe the expected video content
- **Negative Prompt**: Specify elements you don't want to appear
- **Input Image**: Upload input image required in i2v mode
- **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
- **Random Seed**: Controls the randomness of generation results
- **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
### Video Parameters
- **FPS**: Frames per second
- **Total Frames**: Video length
- **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
- **Distribution Shift**: Controls generation style deviation degree (0-10)
## 🔧 Auto-Configuration Feature
The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
- **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
### Log Viewing
```bash
# View inference logs
tail -f inference_logs.log
# View GPU usage
nvidia-smi
# View system resources
htop
```
Welcome to submit Issues and Pull Requests to improve this project!
**Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
# Windows Local Deployment Guide
## 📖 Overview
This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
## 🚀 Quick Start
### Environment Requirements
#### Hardware Requirements
- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
- **Memory**: Recommended 16GB+ RAM
- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
## 🎯 Usage Methods
### Method 1: Using Batch File Inference
Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
### Method 2: Using Gradio Web Interface Inference
#### Manual Gradio Configuration
Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
#### One-Click Gradio Startup (Recommended)
**📦 Download Software Package**
- [Quark Cloud](https://pan.quark.cn/s/8af1162d7a15)
**📁 Directory Structure**
After extraction, ensure the directory structure is as follows:
```
├── env/ # LightX2V environment directory
├── LightX2V/ # LightX2V project directory
├── start_lightx2v.bat # One-click startup script
├── lightx2v_config.txt # Configuration file
├── LightX2V使用说明.txt # LightX2V usage instructions
├── outputs/ # Generated video save directory
└── models/ # Model storage directory
```
**📥 Model Download**:
Refer to [Model Structure Documentation](../getting_started/model_structure.md) or [Gradio Deployment Guide](./deploy_gradio.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
**📋 Configuration Parameters**
Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
```ini
# Interface language (zh: Chinese, en: English)
lang=en
# Server port
port=8032
# GPU device ID (0, 1, 2...)
gpu=0
# Model path
model_path=models/
```
**🚀 Start Service**
Double-click to run the `start_lightx2v.bat` file, the script will:
1. Automatically read configuration file
2. Verify model paths and file integrity
3. Start Gradio Web interface
4. Automatically open browser to access service
![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
**⚠️ Important Notes**:
- **Display Issues**: If the webpage opens blank or displays abnormally, please run `pip install --upgrade gradio` to upgrade the Gradio version.
### Method 3: Using ComfyUI Inference
This guide will instruct you on how to download and use the portable version of the Lightx2v-ComfyUI environment, so you can avoid manual environment configuration steps. This is suitable for users who want to quickly start experiencing accelerated video generation with Lightx2v on Windows systems.
#### Download the Windows Portable Environment:
- [Baidu Cloud Download](https://pan.baidu.com/s/1FVlicTXjmXJA1tAVvNCrBw?pwd=wfid), extraction code: wfid
The portable environment already packages all Python runtime dependencies, including the code and dependencies for ComfyUI and LightX2V. After downloading, simply extract to use.
After extraction, the directory structure is as follows:
```shell
lightx2v_env
├──📂 ComfyUI # ComfyUI code
├──📂 portable_python312_embed # Standalone Python environment
└── run_nvidia_gpu.bat # Windows startup script (double-click to start)
```
#### Start ComfyUI
Directly double-click the run_nvidia_gpu.bat file. The system will open a Command Prompt window and run the program. The first startup may take a while, please be patient. After startup is complete, the browser will automatically open and display the ComfyUI frontend interface.
![i2v example workflow](../../../../assets/figs/portabl_windows/pic1.png)
The plugin used by LightX2V-ComfyUI is [ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper). Example workflows can be obtained from this project.
#### Tested Graphics Cards (offload mode)
- Tested model: `Wan2.1-I2V-14B-480P`
| GPU Model | Task Type | VRAM Capacity | Actual Max VRAM Usage | Actual Max RAM Usage |
|:-----------|:------------|:--------------|:---------------------|:---------------------|
| 3090Ti | I2V | 24G | 6.1G | 7.1G |
| 3080Ti | I2V | 12G | 6.1G | 7.1G |
| 3060Ti | I2V | 8G | 6.1G | 7.1G |
| 4070Ti Super | I2V | 16G | 6.1G | 7.1G |
| 4070 | I2V | 12G | 6.1G | 7.1G |
| 4060 | I2V | 8G | 6.1G | 7.1G |
#### Environment Packaging and Usage Reference
- [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
- [Portable-Windows-ComfyUI-Docs](https://docs.comfy.org/zh-CN/installation/comfyui_portable_windows#portable-%E5%8F%8A%E8%87%AA%E9%83%A8%E7%BD%B2)
# Service Deployment
lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
### Start the Service
```shell
# Modify the paths in the script
bash scripts/start_server.sh
```
The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
### Client Sends Request
```shell
python scripts/post.py
```
The service endpoint is: `/v1/tasks/`
The `message` parameter in `scripts/post.py` is as follows:
```python
message = {
"prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
"negative_prompt": "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": "",
}
```
1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed.
### Client Checks Server Status
```shell
python scripts/check_status.py
```
The service endpoints include:
1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
2. `/v1/tasks/` is used to get all tasks received and completed by the server.
3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
### Client Stops the Current Task on the Server at Any Time
```shell
python scripts/stop_running_task.py
```
The service endpoint is: `/v1/tasks/running`
After terminating the task, the server will not exit but will return to waiting for new requests.
### Starting Multiple Services on a Single Node
On a single node, you can start multiple services using `scripts/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/start_multi_servers.sh`:
```shell
num_gpus=8 bash scripts/start_multi_servers.sh
```
Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
### Scheduling Between Multiple Services
```shell
python scripts/post_multi_servers.py
```
`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
### API Endpoints Summary
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/v1/tasks/` | POST | Create video generation task |
| `/v1/tasks/form` | POST | Create video generation task via form |
| `/v1/tasks/` | GET | Get all task list |
| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
| `/v1/tasks/running` | DELETE | Stop currently running task |
| `/v1/files/download/{file_path}` | GET | Download file |
| `/v1/service/status` | GET | Get service status |
# Deployment for Low Latency Scenarios
In low latency scenarios, we pursue faster speed, ignoring issues such as video memory and RAM overhead. We provide two solutions:
## 💡 Solution 1: Inference with Step Distillation Model
This solution can refer to the [Step Distillation Documentation](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/step_distill.html)
🧠 **Step Distillation** is a very direct acceleration inference solution for video generation models. By distilling from 50 steps to 4 steps, the time consumption will be reduced to 4/50 of the original. At the same time, under this solution, it can still be combined with the following solutions:
1. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html)
2. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html)
## 💡 Solution 2: Inference with Non-Step Distillation Model
Step distillation requires relatively large training resources, and the model after step distillation may have degraded video dynamic range.
For the original model without step distillation, we can use the following solutions or a combination of multiple solutions for acceleration:
1. [Parallel Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/parallel.html) for multi-GPU parallel acceleration.
2. [Feature Caching](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/cache.html) to reduce the actual inference steps.
3. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html) to accelerate Attention inference.
4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
## 💡 Using Tiny VAE
In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
```python
{
"use_tae": true,
"tae_path": "/path to taew2_1.pth"
}
```
The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
## ⚠️ Note
Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
If you have any questions, feel free to report bugs or request features in [🐛 GitHub Issues](https://github.com/ModelTC/lightx2v/issues)
# Lightx2v Low-Resource Deployment Guide
## 📋 Overview
This guide is specifically designed for hardware resource-constrained environments, particularly configurations with **8GB VRAM + 16/32GB RAM**, providing detailed instructions on how to successfully run Lightx2v 14B models for 480p and 720p video generation.
Lightx2v is a powerful video generation model, but it requires careful optimization to run smoothly in resource-constrained environments. This guide provides a complete solution from hardware selection to software configuration, ensuring you can achieve the best video generation experience under limited hardware conditions.
## 🎯 Target Hardware Configuration
### Recommended Hardware Specifications
**GPU Requirements**:
- **VRAM**: 8GB (RTX 3060/3070/4060/4060Ti, etc.)
- **Architecture**: NVIDIA graphics cards with CUDA support
**System Memory**:
- **Minimum**: 16GB DDR4
- **Recommended**: 32GB DDR4/DDR5
- **Memory Speed**: 3200MHz or higher recommended
**Storage Requirements**:
- **Type**: NVMe SSD strongly recommended
- **Capacity**: At least 50GB available space
- **Speed**: Read speed of 3000MB/s or higher recommended
**CPU Requirements**:
- **Cores**: 8 cores or more recommended
- **Frequency**: 3.0GHz or higher recommended
- **Architecture**: Support for AVX2 instruction set
## ⚙️ Core Optimization Strategies
### 1. Environment Optimization
Before running Lightx2v, it's recommended to set the following environment variables to optimize performance:
```bash
# CUDA memory allocation optimization
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# Enable CUDA Graph mode to improve inference performance
export ENABLE_GRAPH_MODE=true
# Use BF16 precision for inference to reduce VRAM usage (default FP32 precision)
export DTYPE=BF16
```
**Optimization Details**:
- `expandable_segments:True`: Allows dynamic expansion of CUDA memory segments, reducing memory fragmentation
- `ENABLE_GRAPH_MODE=true`: Enables CUDA Graph to reduce kernel launch overhead
- `DTYPE=BF16`: Uses BF16 precision to reduce VRAM usage while maintaining quality
### 2. Quantization Strategy
Quantization is a key optimization technique in low-resource environments, reducing memory usage by lowering model precision.
#### Quantization Scheme Comparison
**FP8 Quantization** (Recommended for RTX 40 series):
```python
# Suitable for GPUs supporting FP8, providing better precision
dit_quant_scheme = "fp8" # DIT model quantization
t5_quant_scheme = "fp8" # T5 text encoder quantization
clip_quant_scheme = "fp8" # CLIP visual encoder quantization
```
**INT8 Quantization** (Universal solution):
```python
# Suitable for all GPUs, minimal memory usage
dit_quant_scheme = "int8" # 8-bit integer quantization
t5_quant_scheme = "int8" # Text encoder quantization
clip_quant_scheme = "int8" # Visual encoder quantization
```
### 3. Efficient Operator Selection Guide
Choosing the right operators can significantly improve inference speed and reduce memory usage.
#### Attention Operator Selection
**Recommended Priority**:
1. **[Sage Attention](https://github.com/thu-ml/SageAttention)** (Highest priority)
2. **[Flash Attention](https://github.com/Dao-AILab/flash-attention)** (Universal solution)
#### Matrix Multiplication Operator Selection
**ADA Architecture GPUs** (RTX 40 series):
Recommended priority:
1. **[q8-kernel](https://github.com/KONAKONA666/q8_kernels)** (Highest performance, ADA architecture only)
2. **[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)** (Balanced solution)
3. **[vllm-kernel](https://github.com/vllm-project/vllm)** (Universal solution)
**Other Architecture GPUs**:
1. **[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)** (Recommended)
2. **[vllm-kernel](https://github.com/vllm-project/vllm)** (Alternative)
### 4. Parameter Offloading Strategy
Parameter offloading technology allows models to dynamically schedule parameters between CPU and disk, breaking through VRAM limitations.
#### Three-Level Offloading Architecture
```python
# Disk-CPU-GPU three-level offloading configuration
cpu_offload=True # Enable CPU offloading
t5_cpu_offload=True # Enable T5 encoder CPU offloading
offload_granularity=phase # DIT model fine-grained offloading
t5_offload_granularity=block # T5 encoder fine-grained offloading
lazy_load = True # Enable lazy loading mechanism
num_disk_workers = 2 # Disk I/O worker threads
```
#### Offloading Strategy Details
**Lazy Loading Mechanism**:
- Model parameters are loaded from disk to CPU on demand
- Reduces runtime memory usage
- Supports large models running with limited memory
**Disk Storage Optimization**:
- Use high-speed SSD to store model parameters
- Store model files grouped by blocks
- Refer to conversion script [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md), specify `--save_by_block` parameter during conversion
### 5. VRAM Optimization Techniques
VRAM optimization strategies for 720p video generation.
#### CUDA Memory Management
```python
# CUDA memory cleanup configuration
clean_cuda_cache = True # Timely cleanup of GPU cache
rotary_chunk = True # Rotary position encoding chunked computation
rotary_chunk_size = 100 # Chunk size, adjustable based on VRAM
```
#### Chunked Computation Strategy
**Rotary Position Encoding Chunking**:
- Process long sequences in small chunks
- Reduce peak VRAM usage
- Maintain computational precision
### 6. VAE Optimization
VAE (Variational Autoencoder) is a key component in video generation, and optimizing VAE can significantly improve performance.
#### VAE Chunked Inference
```python
# VAE optimization configuration
use_tiling_vae = True # Enable VAE chunked inference
```
#### Lightweight VAE
```python
# VAE optimization configuration
use_tae = True # Use lightweight VAE
tae_path = "/path to taew2_1.pth"
```
You can download taew2_1.pth [here](https://github.com/madebyollin/taehv/blob/main/taew2_1.pth)
**VAE Optimization Effects**:
- Standard VAE: Baseline performance, 100% quality retention
- Standard VAE chunked: Reduces VRAM usage, increases inference time, 100% quality retention
- Lightweight VAE: Extremely low VRAM usage, video quality loss
### 7. Model Selection Strategy
Choosing the right model version is crucial for low-resource environments.
#### Recommended Model Comparison
**Distilled Models** (Strongly recommended):
-**[Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v)**
-**[Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v)**
#### Performance Optimization Suggestions
When using the above distilled models, you can further optimize performance:
- Disable CFG: `"enable_cfg": false`
- Reduce inference steps: `infer_step: 4`
- Reference configuration files: [config](https://github.com/ModelTC/LightX2V/tree/main/configs/distill)
## 🚀 Complete Configuration Examples
### Pre-configured Templates
- **[14B Model 480p Video Generation Configuration](https://github.com/ModelTC/lightx2v/tree/main/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json)**
- **[14B Model 720p Video Generation Configuration](https://github.com/ModelTC/lightx2v/tree/main/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json)**
- **[1.3B Model 720p Video Generation Configuration](https://github.com/ModelTC/LightX2V/tree/main/configs/offload/block/wan_t2v_1_3b.json)**
- The inference bottleneck for 1.3B models is the T5 encoder, so the configuration file specifically optimizes for T5
**[Launch Script](https://github.com/ModelTC/LightX2V/tree/main/scripts/wan/run_wan_i2v_lazy_load.sh)**
## 📚 Reference Resources
- [Parameter Offloading Mechanism Documentation](../method_tutorials/offload.md) - In-depth understanding of offloading technology principles
- [Quantization Technology Guide](../method_tutorials/quantization.md) - Detailed explanation of quantization technology
- [Gradio Deployment Guide](deploy_gradio.md) - Detailed Gradio deployment instructions
## ⚠️ Important Notes
1. **Hardware Requirements**: Ensure your hardware meets minimum configuration requirements
2. **Driver Version**: Recommend using the latest NVIDIA drivers (535+)
3. **CUDA Version**: Ensure CUDA version is compatible with PyTorch (recommend CUDA 11.8+)
4. **Storage Space**: Reserve sufficient disk space for model caching (at least 50GB)
5. **Network Environment**: Stable network connection required for initial model download
6. **Environment Variables**: Be sure to set the recommended environment variables to optimize performance
**Technical Support**: If you encounter issues, please submit an Issue to the project repository.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment