Commit e2778d0d authored by litzh's avatar litzh
Browse files

Initial commit

parents
Pipeline #3370 canceled with stages
{
"model_cls": "worldplay_distill",
"task": "i2v",
"infer_steps": 4,
"denoising_step_list": [0, 250, 500, 750],
"transformer_model_name": "480p_i2v",
"target_video_length": 125,
"aspect_ratio": "16:9",
"vae_stride": [4, 16, 16],
"sample_shift": 1.0,
"sample_guide_scale": -1.0,
"enable_cfg": false,
"attn_type": "flash_attn2",
"model_type": "ar",
"chunk_latent_frames": 4,
"use_prope": true,
"hidden_size": 2048,
"heads_num": 16,
"mm_double_blocks_depth": 54,
"patch_size": [1, 1, 1],
"rope_dim_list": [16, 56, 56],
"rope_theta": 256,
"in_channels": 32,
"out_channels": 32,
"feature_caching": "NoCaching",
"cpu_offload": true,
"offload_granularity": "block",
"use_prompt_enhancer": false,
"use_image_encoder": true,
"seq_parallel": false,
"parallel": {},
"is_sr_running": false
}
{
"aspect_ratio": "16:9",
"num_channels_latents": 16,
"infer_steps": 9,
"attn_type": "flash_attn3",
"enable_cfg": false,
"sample_guide_scale": 0.0,
"patch_size": 2
}
{
"aspect_ratio": "16:9",
"num_channels_latents": 16,
"infer_steps": 9,
"attn_type": "flash_attn3",
"enable_cfg": false,
"sample_guide_scale": 0.0,
"patch_size": 2,
"dit_quantized": true,
"dit_quant_scheme": "fp8-sgl",
"dit_quantized_ckpt": "/path/to/z_image_turbo_fp8.safetensors"
}
{
"aspect_ratio": "16:9",
"num_channels_latents": 16,
"infer_steps": 9,
"attn_type": "flash_attn3",
"enable_cfg": false,
"sample_guide_scale": 0.0,
"patch_size": 2,
"cpu_offload": true,
"offload_granularity": "model",
"qwen3_quantized": true,
"qwen3_quant_scheme": "int4",
"qwen3_quantized_ckpt": "JunHowie/Qwen3-4B-GPTQ-Int4",
"dit_quantized": true,
"dit_quant_scheme": "fp8-sgl",
"dit_quantized_ckpt": "lightx2v/Z-Image-Turbo-Quantized/z_image_turbo_scaled_fp8_e4m3fn.safetensors"
}
FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
&& python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy soundfile jsonschema pymongo modelscope
RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/Tencent-Hunyuan/flex-block-attn.git --recursive
RUN cd flex-block-attn && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
RUN cd FlashVSR && pip install --no-cache-dir -v -e .
COPY lightx2v_kernel /app/lightx2v_kernel
RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
uv build --wheel \
-Cbuild-dir=build . \
-Ccmake.define.CUTLASS_PATH=/app/cutlass \
--verbose \
--color=always \
--no-build-isolation \
&& pip install dist/*whl --force-reinstall --no-deps \
&& rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
# for base docker
RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
RUN git clone https://github.com/thu-ml/SpargeAttn.git --depth 1
RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" pip install --no-cache-dir --no-build-isolation -v -e .
# for 5090 docker
# RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
# RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# RUN git clone https://github.com/ModelTC/SpargeAttn.git --depth 1
# RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="12.0" pip install --no-cache-dir --no-build-isolation -v -e .
WORKDIR /workspace
FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
&& python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy soundfile jsonschema pymongo modelscope
RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/Tencent-Hunyuan/flex-block-attn.git --recursive
RUN cd flex-block-attn && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
RUN cd FlashVSR && pip install --no-cache-dir -v -e .
COPY lightx2v_kernel /app/lightx2v_kernel
RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
uv build --wheel \
-Cbuild-dir=build . \
-Ccmake.define.CUTLASS_PATH=/app/cutlass \
--verbose \
--color=always \
--no-build-isolation \
&& pip install dist/*whl --force-reinstall --no-deps \
&& rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
# for base docker
# RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
# RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# RUN git clone https://github.com/thu-ml/SpargeAttn.git --depth 1
# RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" pip install --no-cache-dir --no-build-isolation -v -e .
# for 5090 docker
RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SpargeAttn.git --depth 1
RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="12.0" pip install --no-cache-dir --no-build-isolation -v -e .
WORKDIR /workspace
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel AS base
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN apt-get update && apt-get install -y vim tmux zip unzip wget git git-lfs build-essential libibverbs-dev ca-certificates \
curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev\
&& apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
RUN git clone https://github.com/vllm-project/vllm.git -b v0.10.0 && cd vllm \
&& python use_existing_torch.py && pip install -r requirements/build.txt \
&& pip install --no-cache-dir --no-build-isolation -v -e .
RUN git clone https://github.com/sgl-project/sglang.git -b v0.4.10 && cd sglang/sgl-kernel \
&& make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy av decord
RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y
RUN git clone https://github.com/Dao-AILab/flash-attention.git -b v2.8.3 --recursive
RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention.git
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN git clone https://github.com/KONAKONA666/q8_kernels.git
RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
# cloud deploy
RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
ENV PATH=/root/.cargo/bin:$PATH
RUN cd /opt \
&& wget https://mirrors.tuna.tsinghua.edu.cn/gnu//libiconv/libiconv-1.15.tar.gz \
&& tar zxvf libiconv-1.15.tar.gz \
&& cd libiconv-1.15 \
&& ./configure \
&& make \
&& make install \
&& rm -rf /opt/libiconv-1.15
RUN cd /opt \
&& git clone https://github.com/GStreamer/gstreamer.git -b 1.24.12 --depth 1 \
&& cd gstreamer \
&& meson setup builddir \
&& meson compile -C builddir \
&& meson install -C builddir \
&& ldconfig \
&& rm -rf /opt/gstreamer
RUN cd /opt \
&& git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.24.12 --depth 1 \
&& cd gst-plugins-rs \
&& cargo build --package gst-plugin-webrtchttp --release \
&& install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
&& rm -rf /opt/gst-plugins-rs
RUN ldconfig
WORKDIR /workspace
FROM node:alpine3.21 AS frontend_builder
COPY lightx2v /opt/lightx2v
RUN cd /opt/lightx2v/deploy/server/frontend \
&& npm install \
&& npm run build
FROM lightx2v/lightx2v:25111101-cu128 AS base
RUN mkdir /workspace/LightX2V
WORKDIR /workspace/LightX2V
ENV PYTHONPATH=/workspace/LightX2V
# for multi-person & animate
RUN pip install ultralytics moviepy pydub pyannote.audio decord peft onnxruntime-gpu pandas matplotlib loguru sentencepiece
RUN export COMMIT=0e78a118995e66bb27d78518c4bd9a3e95b4e266 \
&& export TORCH_CUDA_ARCH_LIST="9.0" \
&& git clone --depth 1 https://github.com/facebookresearch/sam2.git \
&& cd sam2 \
&& git fetch --depth 1 origin $COMMIT \
&& git checkout $COMMIT \
&& python setup.py install
COPY tools tools
COPY assets assets
COPY configs configs
COPY lightx2v lightx2v
COPY lightx2v_kernel lightx2v_kernel
COPY lightx2v_platform lightx2v_platform
COPY --from=frontend_builder /opt/lightx2v/deploy/server/frontend/dist lightx2v/deploy/server/frontend/dist
FROM quay.io/ascend/vllm-ascend:v0.11.0rc3
# Set envs
ENV PYTHONPATH=/workspace/LightX2V
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/driver/lib64/driver/:/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/lib/:/usr/local/python3.11.13/lib/python3.11/site-packages/torch/lib
ENV PATH=${PATH}:/usr/local/go/bin:/usr/local/python3.11.13/bin/
# Install deps
RUN apt-get update && apt-get install ffmpeg -y && \
pip install --no-cache-dir \
imageio \
imageio-ffmpeg \
ftfy \
aiohttp \
gguf \
loguru \
accelerate \
diffusers && \
pip install --no-cache-dir torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cpu
# Copy files
COPY app app
COPY assets assets
COPY configs configs
COPY lightx2v lightx2v
COPY lightx2v_kernel lightx2v_kernel
COPY lightx2v_platform lightx2v_platform
COPY scripts scripts
COPY test_cases test_cases
COPY tools tools
FROM cambricon-base/pytorch:v25.10.0-torch2.8.0-torchmlu1.29.1-ubuntu22.04-py310 AS base
WORKDIR /workspace/LightX2V
# Set envs
ENV PYTHONPATH=/workspace/LightX2V
ENV LD_LIBRARY_PATH=/usr/local/neuware/lib64:${LD_LIBRARY_PATH}
# Install deps
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg && \
pip install --no-cache-dir \
ftfy \
imageio \
imageio-ffmpeg \
loguru \
aiohttp \
gguf \
diffusers \
peft==0.17.0 \
transformers==4.57.1 &&
# Copy files
COPY app app
COPY assets assets
COPY configs configs
COPY lightx2v lightx2v
COPY lightx2v_kernel lightx2v_kernel
COPY lightx2v_platform lightx2v_platform
COPY scripts scripts
COPY test_cases test_cases
COPY tools tools
# Dockerfile for LightX2V on AMD ROCm platform
# Base image: SGLang with ROCm 7.0.0 for MI300X
FROM lmsysorg/sglang:v0.5.6.post2-rocm700-mi35x
LABEL maintainer="LightX2V Contributors"
LABEL description="LightX2V video generation framework with AMD ROCm support"
# Set working directory
WORKDIR /workspace
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ffmpeg \
libsm6 \
libxext6 \
&& rm -rf /var/lib/apt/lists/*
# Install aiter (AMD ROCm optimized kernels)
# Commit: a7d3bf8cd47afbaf6a6133c1f12e3b01d2c27b0e
ARG AITER_COMMIT=a7d3bf8cd47afbaf6a6133c1f12e3b01d2c27b0e
RUN git clone https://github.com/ROCm/aiter.git /tmp/aiter && \
cd /tmp/aiter && \
git checkout ${AITER_COMMIT} && \
pip install --no-cache-dir -e . && \
rm -rf /tmp/aiter/.git
# Install flash-attn for ROCm
RUN pip install --no-cache-dir flash-attn --no-build-isolation
# Copy LightX2V source
COPY . /workspace/LightX2V
# Install LightX2V dependencies
WORKDIR /workspace/LightX2V
RUN pip install --no-cache-dir -r requirements.txt
# Install LightX2V
RUN pip install --no-cache-dir -e .
# Set environment variables for AMD ROCm
ENV HIP_VISIBLE_DEVICES=0
ENV ROCM_PATH=/opt/rocm
ENV HSA_FORCE_FINE_GRAIN_PCIE=1
# Default command
CMD ["python", "-c", "from lightx2v import LightX2VPipeline; print('LightX2V AMD ROCm ready!')"]
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-20.04
tools:
python: "3.10"
formats:
- epub
sphinx:
configuration: docs/EN/source/conf.py
python:
install:
- requirements: requirements-docs.txt
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import logging
import os
import sys
from typing import List
import sphinxcontrib.redoc
from sphinx.ext import autodoc
logger = logging.getLogger(__name__)
sys.path.append(os.path.abspath("../.."))
# -- Project information -----------------------------------------------------
project = "Lightx2v"
copyright = "2025, Lightx2v Team"
author = "the Lightx2v Team"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_copybutton",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.mathjax",
"myst_parser",
"sphinxarg.ext",
"sphinxcontrib.redoc",
"sphinxcontrib.openapi",
]
myst_enable_extensions = [
"dollarmath",
"amsmath",
]
html_static_path = ["_static"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.rst"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
copybutton_prompt_is_regexp = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_title = project
html_theme = "sphinx_book_theme"
# html_theme = 'sphinx_rtd_theme'
html_logo = "../../../assets/img_lightx2v.png"
html_theme_options = {
"path_to_docs": "docs/EN/source",
"repository_url": "https://github.com/ModelTC/lightx2v",
"use_repository_button": True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Generate additional rst documentation here.
def setup(app):
# from docs.source.generate_examples import generate_examples
# generate_examples()
pass
# Mock out external dependencies here.
autodoc_mock_imports = [
"cpuinfo",
"torch",
"transformers",
"psutil",
"prometheus_client",
"sentencepiece",
"lightllmnumpy",
"tqdm",
"tensorizer",
]
for mock_target in autodoc_mock_imports:
if mock_target in sys.modules:
logger.info(
"Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
mock_target,
)
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == " Bases: :py:class:`object`":
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
# ComfyUI Deployment
## ComfyUI-Lightx2vWrapper
The official ComfyUI integration nodes for LightX2V are now available in a dedicated repository, providing a complete modular configuration system and optimization features.
### Project Repository
- GitHub: [https://github.com/ModelTC/ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper)
### Key Features
- Modular Configuration System: Separate nodes for each aspect of video generation
- Support for both Text-to-Video (T2V) and Image-to-Video (I2V) generation modes
- Advanced Optimizations:
- TeaCache acceleration (up to 3x speedup)
- Quantization support (int8, fp8)
- Memory optimization with CPU offloading
- Lightweight VAE options
- LoRA Support: Chain multiple LoRA models for customization
- Multiple Model Support: wan2.1, hunyuan architectures
### Installation and Usage
Please visit the GitHub repository above for detailed installation instructions, usage tutorials, and example workflows.
# Gradio Deployment Guide
## 📖 Overview
Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
For Windows systems, we provide a convenient one-click deployment solution with automatic environment configuration and intelligent parameter optimization. Please refer to the [One-Click Gradio Startup (Recommended)](./deploy_local_windows.md/#one-click-gradio-startup-recommended) section for detailed instructions.
![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
## 📁 File Structure
```
LightX2V/app/
├── gradio_demo.py # English interface demo
├── gradio_demo_zh.py # Chinese interface demo
├── run_gradio.sh # Startup script
├── README.md # Documentation
├── outputs/ # Generated video save directory
└── inference_logs.log # Inference logs
```
This project contains two main demo files:
- `gradio_demo.py` - English interface version
- `gradio_demo_zh.py` - Chinese interface version
## 🚀 Quick Start
### Environment Requirements
Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
#### Recommended Optimization Library Configuration
-[Flash attention](https://github.com/Dao-AILab/flash-attention)
-[Sage attention](https://github.com/thu-ml/SageAttention)
-[vllm-kernel](https://github.com/vllm-project/vllm)
-[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
-[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
Install according to the project homepage tutorials for each operator as needed.
### 📥 Model Download
Models can be downloaded with one click through the frontend interface, with two download sources provided: HuggingFace and ModelScope. You can choose according to your situation. You can also refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
#### wan2.1 Model Directory Structure
```
models/
├── wan2.1_i2v_720p_lightx2v_4step.safetensors # Original precision
├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors # FP8 quantization
├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors # INT8 quantization
├── wan2.1_i2v_720p_int8_lightx2v_4step_split # INT8 quantization block storage directory
├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split # FP8 quantization block storage directory
├── Other weights (e.g., t2v)
├── t5/clip/xlm-roberta-large/google # text and image encoder
├── vae/lightvae/lighttae # vae
└── config.json # Model configuration file
```
#### wan2.2 Model Directory Structure
```
models/
├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors # high noise original precision
├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors # high noise FP8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors # high noise INT8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split # high noise INT8 quantization block storage directory
├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors # low noise original precision
├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors # low noise FP8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors # low noise INT8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split # low noise INT8 quantization block storage directory
├── t5/clip/xlm-roberta-large/google # text and image encoder
├── vae/lightvae/lighttae # vae
└── config.json # Model configuration file
```
**📝 Download Instructions**:
- Model weights can be downloaded from HuggingFace:
- [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
- [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoders)
- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
### Startup Methods
#### Method 1: Using Startup Script (Recommended)
**Linux Environment:**
```bash
# 1. Edit the startup script to configure relevant paths
cd app/
vim run_gradio.sh
# Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path
# - model_path: Model root directory path (contains all model files)
# 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: /mnt/ssd/models/ or /data/ssd/models/
# 2. Run the startup script
bash run_gradio.sh
# 3. Or start with parameters
bash run_gradio.sh --lang en --port 8032
bash run_gradio.sh --lang zh --port 7862
```
**Windows Environment:**
```cmd
# 1. Edit the startup script to configure relevant paths
cd app\
notepad run_gradio_win.bat
# Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path
# - model_path: Model root directory path (contains all model files)
# 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: D:\models\ or E:\models\
# 2. Run the startup script
run_gradio_win.bat
# 3. Or start with parameters
run_gradio_win.bat --lang en --port 8032
run_gradio_win.bat --lang zh --port 7862
```
#### Method 2: Direct Command Line Startup
```bash
pip install -v git+https://github.com/ModelTC/LightX2V.git
```
**Linux Environment:**
**English Interface Version:**
```bash
python gradio_demo.py \
--model_path /path/to/models \
--server_name 0.0.0.0 \
--server_port 7862
```
**Chinese Interface Version:**
```bash
python gradio_demo_zh.py \
--model_path /path/to/models \
--server_name 0.0.0.0 \
--server_port 7862
```
**Windows Environment:**
**English Interface Version:**
```cmd
python gradio_demo.py ^
--model_path D:\models ^
--server_name 127.0.0.1 ^
--server_port 7862
```
**Chinese Interface Version:**
```cmd
python gradio_demo_zh.py ^
--model_path D:\models ^
--server_name 127.0.0.1 ^
--server_port 7862
```
**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 📋 Command Line Parameters
| Parameter | Type | Required | Default | Description |
|-----------|------|----------|---------|-------------|
| `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
| `--server_port` | int | ❌ | 7862 | Server port |
| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 🎯 Features
### Model Configuration
- **Model Type**: Supports wan2.1 and wan2.2 model architectures
- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
### Input Parameters
- **Prompt**: Describe the expected video content
- **Negative Prompt**: Specify elements you don't want to appear
- **Input Image**: Upload input image required in i2v mode
- **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
- **Random Seed**: Controls the randomness of generation results
- **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
### Video Parameters
- **FPS**: Frames per second
- **Total Frames**: Video length
- **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
- **Distribution Shift**: Controls generation style deviation degree (0-10)
## 🔧 Auto-Configuration Feature
The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
- **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
### Log Viewing
```bash
# View inference logs
tail -f inference_logs.log
# View GPU usage
nvidia-smi
# View system resources
htop
```
Welcome to submit Issues and Pull Requests to improve this project!
**Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
# Windows Local Deployment Guide
## 📖 Overview
This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
## 🚀 Quick Start
### Environment Requirements
#### Hardware Requirements
- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
- **Memory**: Recommended 16GB+ RAM
- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
## 🎯 Usage Methods
### Method 1: Using Batch File Inference
Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
### Method 2: Using Gradio Web Interface Inference
#### Manual Gradio Configuration
Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
#### One-Click Gradio Startup (Recommended)
**📦 Download Software Package**
- [Quark Cloud](https://pan.quark.cn/s/f44023dcf8c8)
**📁 Directory Structure**
After extraction, ensure the directory structure is as follows:
```
├── env/ # LightX2V environment directory
├── LightX2V/ # LightX2V project directory
├── start_lightx2v.bat # One-click startup script
├── lightx2v_config.txt # Configuration file
├── LightX2V使用说明.txt # LightX2V usage instructions
├── outputs/ # Generated video save directory
└── models/ # Model storage directory
```
**⚠️ Note for RTX 50 Series GPU Users**: We provide a dedicated runtime environment. Please download from [Quark Cloud](https://pan.quark.cn/s/52b9a8c8f07a), extract and replace the `env/` directory in the software package.
**📥 Model Download**:
You can directly download from the Gradio frontend, with two download sources provided: HuggingFace and ModelScope. You can choose according to your situation, or refer to the [Model Structure Documentation](../getting_started/model_structure.md) to manually download.
**📋 Configuration Parameters**
Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
```ini
# Interface language (zh: Chinese, en: English)
lang=en
# Server port
port=8032
# GPU device ID (0, 1, 2...)
gpu=0
# Model path
model_path=models/
```
**🚀 Start Service**
Double-click to run the `start_lightx2v.bat` file, the script will:
1. Automatically read configuration file
2. Verify model paths and file integrity
3. Start Gradio Web interface
4. Automatically open browser to access service
![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
**⚠️ Important Notes**:
- **Display Issues**: If the webpage opens blank or displays abnormally, please run `pip install --upgrade gradio` to upgrade the Gradio version.
# Service Deployment
lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/LightX2V/blob/main/lightx2v/server/main.py)
### Start the Service
```shell
# Modify the paths in the script
bash scripts/server/start_server.sh
```
The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
### Client Sends Request
```shell
python scripts/server/post.py
```
The service endpoint is: `/v1/tasks/`
The `message` parameter in `scripts/server/post.py` is as follows:
```python
message = {
"prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
"negative_prompt": "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": "",
"target_shape": [720, 720],
}
```
1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed. `target_shape` optional output video resolution, defaults to the configured resolution.
### Client Checks Server Status
```shell
python scripts/server/check_status.py
```
The service endpoints include:
1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
2. `/v1/tasks/` is used to get all tasks received and completed by the server.
3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
### Client Stops the Current Task on the Server at Any Time
```shell
python scripts/server/stop_running_task.py
```
The service endpoint is: `/v1/tasks/running`
After terminating the task, the server will not exit but will return to waiting for new requests.
### Starting Multiple Services on a Single Node
On a single node, you can start multiple services using `scripts/server/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/server/start_multi_servers.sh`:
```shell
num_gpus=8 bash scripts/server/start_multi_servers.sh
```
Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
### Scheduling Between Multiple Services
```shell
python scripts/server/post_multi_servers.py
```
`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
### API Endpoints Summary
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/v1/tasks/` | POST | Create video generation task |
| `/v1/tasks/form` | POST | Create video generation task via form |
| `/v1/tasks/` | GET | Get all task list |
| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
| `/v1/tasks/running` | DELETE | Stop currently running task |
| `/v1/files/download/{file_path}` | GET | Download file |
| `/v1/service/status` | GET | Get service status |
# Deployment for Low Latency Scenarios
In low latency scenarios, we pursue faster speed, ignoring issues such as video memory and RAM overhead. We provide two solutions:
## 💡 Solution 1: Inference with Step Distillation Model
This solution can refer to the [Step Distillation Documentation](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/step_distill.html)
🧠 **Step Distillation** is a very direct acceleration inference solution for video generation models. By distilling from 50 steps to 4 steps, the time consumption will be reduced to 4/50 of the original. At the same time, under this solution, it can still be combined with the following solutions:
1. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html)
2. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html)
## 💡 Solution 2: Inference with Non-Step Distillation Model
Step distillation requires relatively large training resources, and the model after step distillation may have degraded video dynamic range.
For the original model without step distillation, we can use the following solutions or a combination of multiple solutions for acceleration:
1. [Parallel Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/parallel.html) for multi-GPU parallel acceleration.
2. [Feature Caching](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/cache.html) to reduce the actual inference steps.
3. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html) to accelerate Attention inference.
4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
## 💡 Using Tiny VAE
In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
```python
{
"use_tae": true,
"tae_path": "/path to taew2_1.pth"
}
```
The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
## ⚠️ Note
Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
If you have any questions, feel free to report bugs or request features in [🐛 GitHub Issues](https://github.com/ModelTC/lightx2v/issues)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment