updata lightx2v

a1ebc651 · xuwx1 · 5a4db490 · a1ebc651 · a1ebc651 · a1ebc651
Commit a1ebc651 authored Dec 11, 2025 by xuwx1
20 changed files
--- a/configs/wan22/wan_moe_t2v_distill.json
+++ b/configs/wan22/wan_moe_t2v_distill.json
+{
+    "infer_steps": 4,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": [
+        4.0,
+        3.0
+    ],
+    "sample_shift": 5.0,
+    "enable_cfg": false,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "t5_cpu_offload": false,
+    "vae_cpu_offload": false,
+    "boundary_step_index": 2,
+    "denoising_step_list": [
+        1000,
+        750,
+        500,
+        250
+    ],
+    "lora_configs": [
+        {
+            "name": "low_noise_model",
+            "path": "Wan2.1-T2V-14B/loras/Wan21_T2V_14B_lightx2v_cfg_step_distill_lora_rank64.safetensors",
+            "strength": 1.0
+        }
+    ]
+}
--- a/configs/wan22/wan_ti2v_i2v.json
+++ b/configs/wan22/wan_ti2v_i2v.json
+{
+    "infer_steps": 50,
+    "target_video_length": 121,
+    "text_len": 512,
+    "target_height": 704,
+    "target_width": 1280,
+    "num_channels_latents": 48,
+    "vae_stride": [
+        4,
+        16,
+        16
+    ],
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": 5.0,
+    "sample_shift": 5.0,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "offload_granularity": "model",
+    "t5_cpu_offload": false,
+    "vae_cpu_offload": false,
+    "fps": 24,
+    "use_image_encoder": false
+}
--- a/configs/wan22/wan_ti2v_i2v_4090.json
+++ b/configs/wan22/wan_ti2v_i2v_4090.json
+{
+    "infer_steps": 50,
+    "target_video_length": 121,
+    "text_len": 512,
+    "target_height": 704,
+    "target_width": 1280,
+    "num_channels_latents": 48,
+    "vae_stride": [
+        4,
+        16,
+        16
+    ],
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": 5.0,
+    "sample_shift": 5.0,
+    "enable_cfg": true,
+    "fps": 24,
+    "use_image_encoder": false,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "t5_cpu_offload": false,
+    "vae_cpu_offload": false,
+    "vae_offload_cache": true
+}
--- a/configs/wan22/wan_ti2v_t2v.json
+++ b/configs/wan22/wan_ti2v_t2v.json
+{
+    "infer_steps": 50,
+    "target_video_length": 121,
+    "text_len": 512,
+    "target_height": 704,
+    "target_width": 1280,
+    "num_channels_latents": 48,
+    "vae_stride": [
+        4,
+        16,
+        16
+    ],
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": 5.0,
+    "sample_shift": 5.0,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "offload_granularity": "model",
+    "t5_cpu_offload": false,
+    "vae_cpu_offload": false,
+    "fps": 24
+}
--- a/configs/wan22/wan_ti2v_t2v_4090.json
+++ b/configs/wan22/wan_ti2v_t2v_4090.json
+{
+    "infer_steps": 50,
+    "target_video_length": 121,
+    "text_len": 512,
+    "target_height": 704,
+    "target_width": 1280,
+    "num_channels_latents": 48,
+    "vae_stride": [
+        4,
+        16,
+        16
+    ],
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": 5.0,
+    "sample_shift": 5.0,
+    "enable_cfg": true,
+    "fps": 24,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "t5_cpu_offload": false,
+    "vae_cpu_offload": false,
+    "vae_offload_cache": true
+}
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
+
+WORKDIR /app
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+
+RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
+
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+
+RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
+    && python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+
+RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
+    && make build && make clean
+
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy
+
+RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
+
+RUN cd flash-attention && python setup.py install && rm -rf build
+
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+
+RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
+
+RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
+
+RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+
+RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
+
+RUN cd FlashVSR && pip install --no-cache-dir -v -e .
+
+COPY lightx2v_kernel /app/lightx2v_kernel
+
+RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
+    uv build --wheel \
+        -Cbuild-dir=build . \
+        -Ccmake.define.CUTLASS_PATH=/app/cutlass \
+        --verbose \
+        --color=always \
+        --no-build-isolation \
+    && pip install dist/*whl --force-reinstall --no-deps \
+    && rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
+
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+
+RUN ldconfig
+
+
+# q8f for base docker
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+
+# q8f for 5090 docker
+# RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
+# RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_5090
+++ b/dockerfiles/Dockerfile_5090
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
+
+WORKDIR /app
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+
+RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
+
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+
+RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
+    && python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+
+RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
+    && make build && make clean
+
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy
+
+RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
+
+RUN cd flash-attention && python setup.py install && rm -rf build
+
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+
+RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
+
+RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
+
+RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+
+RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
+
+RUN cd FlashVSR && pip install --no-cache-dir -v -e .
+
+COPY lightx2v_kernel /app/lightx2v_kernel
+
+RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
+    uv build --wheel \
+        -Cbuild-dir=build . \
+        -Ccmake.define.CUTLASS_PATH=/app/cutlass \
+        --verbose \
+        --color=always \
+        --no-build-isolation \
+    && pip install dist/*whl --force-reinstall --no-deps \
+    && rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
+
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+
+RUN ldconfig
+
+
+# q8f for base docker
+# RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
+# RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+
+# q8f for 5090 docker
+RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
+RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_cambricon_mlu590
+++ b/dockerfiles/Dockerfile_cambricon_mlu590
+FROM cambricon-base/pytorch:v25.10.0-torch2.8.0-torchmlu1.29.1-ubuntu22.04-py310 AS base
+
+WORKDIR /workspace/LightX2V
+
+# Set envs
+ENV PYTHONPATH=/workspace/LightX2V
+ENV LD_LIBRARY_PATH=/usr/local/neuware/lib64:${LD_LIBRARY_PATH}
+
+# Install deps
+RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg && \
+    pip install --no-cache-dir \
+    ftfy \
+    imageio \
+    imageio-ffmpeg \
+    loguru \
+    aiohttp \
+    gguf \
+    diffusers \
+    peft==0.17.0 \
+    transformers==4.57.1 &&
+
+# Copy files
+COPY app app
+COPY assets assets
+COPY configs configs
+COPY lightx2v lightx2v
+COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
+COPY scripts scripts
+COPY test_cases test_cases
+COPY tools tools
--- a/dockerfiles/Dockerfile_cu124
+++ b/dockerfiles/Dockerfile_cu124
+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel AS base
+
+WORKDIR /app
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+
+RUN apt-get update && apt-get install -y vim tmux zip unzip wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev\
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+
+RUN git clone https://github.com/vllm-project/vllm.git -b v0.10.0 && cd vllm \
+    && python use_existing_torch.py && pip install -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+
+RUN git clone https://github.com/sgl-project/sglang.git -b v0.4.10 && cd sglang/sgl-kernel \
+    && make build && make clean
+
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord
+
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y
+
+RUN git clone https://github.com/Dao-AILab/flash-attention.git -b v2.8.3 --recursive
+
+RUN cd flash-attention && python setup.py install && rm -rf build
+
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+
+RUN git clone https://github.com/ModelTC/SageAttention.git
+
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git
+
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu//libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.24.12 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.24.12 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+
+RUN ldconfig
+
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_deploy
+++ b/dockerfiles/Dockerfile_deploy
+FROM node:alpine3.21 AS frontend_builder
+COPY lightx2v /opt/lightx2v
+
+RUN cd /opt/lightx2v/deploy/server/frontend \
+    && npm install \
+    && npm run build
+
+FROM lightx2v/lightx2v:25111101-cu128 AS base
+
+RUN mkdir /workspace/LightX2V
+WORKDIR /workspace/LightX2V
+ENV PYTHONPATH=/workspace/LightX2V
+
+# for multi-person & animate
+RUN pip install ultralytics moviepy pydub pyannote.audio onnxruntime decord peft onnxruntime pandas matplotlib loguru sentencepiece
+
+RUN export COMMIT=0e78a118995e66bb27d78518c4bd9a3e95b4e266 \
+    && export TORCH_CUDA_ARCH_LIST="9.0" \
+    && git clone --depth 1 https://github.com/facebookresearch/sam2.git \
+    && cd sam2 \
+    && git fetch --depth 1 origin $COMMIT \
+    && git checkout $COMMIT \
+    && python setup.py install
+
+COPY tools tools
+COPY assets assets
+COPY configs configs
+COPY lightx2v lightx2v
+COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
+
+COPY --from=frontend_builder /opt/lightx2v/deploy/server/frontend/dist lightx2v/deploy/server/frontend/dist
--- a/docs/EN/.readthedocs.yaml
+++ b/docs/EN/.readthedocs.yaml
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.10"
+
+formats:
+    - epub
+
+sphinx:
+  configuration: docs/EN/source/conf.py
+
+python:
+  install:
+    - requirements: requirements-docs.txt
--- a/docs/EN/Makefile
+++ b/docs/EN/Makefile
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/EN/make.bat
+++ b/docs/EN/make.bat
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/EN/source/conf.py
+++ b/docs/EN/source/conf.py
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+
+import logging
+import os
+import sys
+from typing import List
+
+import sphinxcontrib.redoc
+from sphinx.ext import autodoc
+
+logger = logging.getLogger(__name__)
+sys.path.append(os.path.abspath("../.."))
+
+# -- Project information -----------------------------------------------------
+
+project = "Lightx2v"
+copyright = "2025, Lightx2v Team"
+author = "the Lightx2v Team"
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
+    "myst_parser",
+    "sphinxarg.ext",
+    "sphinxcontrib.redoc",
+    "sphinxcontrib.openapi",
+]
+
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+]
+
+html_static_path = ["_static"]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns: List[str] = ["**/*.template.rst"]
+
+# Exclude the prompt "$" when copying code
+copybutton_prompt_text = r"\$ "
+copybutton_prompt_is_regexp = True
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_title = project
+html_theme = "sphinx_book_theme"
+# html_theme = 'sphinx_rtd_theme'
+html_logo = "../../../assets/img_lightx2v.png"
+html_theme_options = {
+    "path_to_docs": "docs/EN/source",
+    "repository_url": "https://github.com/ModelTC/lightx2v",
+    "use_repository_button": True,
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = ['_static']
+
+
+# Generate additional rst documentation here.
+def setup(app):
+    # from docs.source.generate_examples import generate_examples
+    # generate_examples()
+    pass
+
+
+# Mock out external dependencies here.
+autodoc_mock_imports = [
+    "cpuinfo",
+    "torch",
+    "transformers",
+    "psutil",
+    "prometheus_client",
+    "sentencepiece",
+    "lightllmnumpy",
+    "tqdm",
+    "tensorizer",
+]
+
+for mock_target in autodoc_mock_imports:
+    if mock_target in sys.modules:
+        logger.info(
+            "Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
+            mock_target,
+        )
+
+
+class MockedClassDocumenter(autodoc.ClassDocumenter):
+    """Remove note about base class when a class is derived from object."""
+
+    def add_line(self, line: str, source: str, *lineno: int) -> None:
+        if line == "   Bases: :py:class:`object`":
+            return
+        super().add_line(line, source, *lineno)
+
+
+autodoc.ClassDocumenter = MockedClassDocumenter
+
+navigation_with_keys = False
--- a/docs/EN/source/deploy_guides/deploy_comfyui.md
+++ b/docs/EN/source/deploy_guides/deploy_comfyui.md
+# ComfyUI Deployment
+
+## ComfyUI-Lightx2vWrapper
+
+The official ComfyUI integration nodes for LightX2V are now available in a dedicated repository, providing a complete modular configuration system and optimization features.
+
+### Project Repository
+
+- GitHub: [https://github.com/ModelTC/ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper)
+
+### Key Features
+
+- Modular Configuration System: Separate nodes for each aspect of video generation
+- Support for both Text-to-Video (T2V) and Image-to-Video (I2V) generation modes
+- Advanced Optimizations:
+  - TeaCache acceleration (up to 3x speedup)
+  - Quantization support (int8, fp8)
+  - Memory optimization with CPU offloading
+  - Lightweight VAE options
+- LoRA Support: Chain multiple LoRA models for customization
+- Multiple Model Support: wan2.1, hunyuan architectures
+
+### Installation and Usage
+
+Please visit the GitHub repository above for detailed installation instructions, usage tutorials, and example workflows.
--- a/docs/EN/source/deploy_guides/deploy_gradio.md
+++ b/docs/EN/source/deploy_guides/deploy_gradio.md
+# Gradio Deployment Guide
+
+## 📖 Overview
+
+Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
+
+For Windows systems, we provide a convenient one-click deployment solution with automatic environment configuration and intelligent parameter optimization. Please refer to the [One-Click Gradio Startup (Recommended)](./deploy_local_windows.md/#one-click-gradio-startup-recommended) section for detailed instructions.
+
+![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
+
+## 📁 File Structure
+
+```
+LightX2V/app/
+├── gradio_demo.py          # English interface demo
+├── gradio_demo_zh.py       # Chinese interface demo
+├── run_gradio.sh          # Startup script
+├── README.md              # Documentation
+├── outputs/               # Generated video save directory
+└── inference_logs.log     # Inference logs
+```
+
+This project contains two main demo files:
+- `gradio_demo.py` - English interface version
+- `gradio_demo_zh.py` - Chinese interface version
+
+## 🚀 Quick Start
+
+### Environment Requirements
+
+Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
+
+#### Recommended Optimization Library Configuration
+
+- ✅ [Flash attention](https://github.com/Dao-AILab/flash-attention)
+- ✅ [Sage attention](https://github.com/thu-ml/SageAttention)
+- ✅ [vllm-kernel](https://github.com/vllm-project/vllm)
+- ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
+- ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
+
+Install according to the project homepage tutorials for each operator as needed.
+
+### 📥 Model Download
+
+Refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
+
+#### wan2.1 Model Directory Structure
+
+```
+models/
+├── wan2.1_i2v_720p_lightx2v_4step.safetensors                   # Original precision
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors   # FP8 quantization
+├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors              # INT8 quantization
+├── wan2.1_i2v_720p_int8_lightx2v_4step_split                    # INT8 quantization block storage directory
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split         # FP8 quantization block storage directory
+├── Other weights (e.g., t2v)
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # Model configuration file
+```
+
+#### wan2.2 Model Directory Structure
+
+```
+models/
+├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors        # high noise original precision
+├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors    # high noise FP8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors   # high noise INT8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split         # high noise INT8 quantization block storage directory
+├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors         # low noise original precision
+├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors     # low noise FP8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors    # low noise INT8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split          # low noise INT8 quantization block storage directory
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # Model configuration file
+```
+
+**📝 Download Instructions**:
+
+- Model weights can be downloaded from HuggingFace:
+  - [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
+  - [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
+- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoders)
+- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
+- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
+
+### Startup Methods
+
+#### Method 1: Using Startup Script (Recommended)
+
+**Linux Environment:**
+```bash
+# 1. Edit the startup script to configure relevant paths
+cd app/
+vim run_gradio.sh
+
+# Configuration items that need to be modified:
+# - lightx2v_path: Lightx2v project root directory path
+# - model_path: Model root directory path (contains all model files)
+
+# 💾 Important note: Recommend pointing model paths to SSD storage locations
+# Example: /mnt/ssd/models/ or /data/ssd/models/
+
+# 2. Run the startup script
+bash run_gradio.sh
+
+# 3. Or start with parameters
+bash run_gradio.sh --lang en --port 8032
+bash run_gradio.sh --lang zh --port 7862
+```
+
+**Windows Environment:**
+```cmd
+# 1. Edit the startup script to configure relevant paths
+cd app\
+notepad run_gradio_win.bat
+
+# Configuration items that need to be modified:
+# - lightx2v_path: Lightx2v project root directory path
+# - model_path: Model root directory path (contains all model files)
+
+# 💾 Important note: Recommend pointing model paths to SSD storage locations
+# Example: D:\models\ or E:\models\
+
+# 2. Run the startup script
+run_gradio_win.bat
+
+# 3. Or start with parameters
+run_gradio_win.bat --lang en --port 8032
+run_gradio_win.bat --lang zh --port 7862
+```
+
+#### Method 2: Direct Command Line Startup
+
+```bash
+pip install -v git+https://github.com/ModelTC/LightX2V.git
+```
+
+**Linux Environment:**
+
+**English Interface Version:**
+```bash
+python gradio_demo.py \
+    --model_path /path/to/models \
+    --server_name 0.0.0.0 \
+    --server_port 7862
+```
+
+**Chinese Interface Version:**
+```bash
+python gradio_demo_zh.py \
+    --model_path /path/to/models \
+    --server_name 0.0.0.0 \
+    --server_port 7862
+```
+
+**Windows Environment:**
+
+**English Interface Version:**
+```cmd
+python gradio_demo.py ^
+    --model_path D:\models ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
+
+**Chinese Interface Version:**
+```cmd
+python gradio_demo_zh.py ^
+    --model_path D:\models ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
+
+**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
+
+## 📋 Command Line Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
+| `--server_port` | int | ❌ | 7862 | Server port |
+| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
+| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
+
+**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
+
+## 🎯 Features
+
+### Model Configuration
+
+- **Model Type**: Supports wan2.1 and wan2.2 model architectures
+- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
+- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
+- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
+- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
+
+### Input Parameters
+
+- **Prompt**: Describe the expected video content
+- **Negative Prompt**: Specify elements you don't want to appear
+- **Input Image**: Upload input image required in i2v mode
+- **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
+- **Random Seed**: Controls the randomness of generation results
+- **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
+
+### Video Parameters
+
+- **FPS**: Frames per second
+- **Total Frames**: Video length
+- **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
+- **Distribution Shift**: Controls generation style deviation degree (0-10)
+
+## 🔧 Auto-Configuration Feature
+
+The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
+
+- **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
+- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
+- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
+- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
+
+
+### Log Viewing
+
+```bash
+# View inference logs
+tail -f inference_logs.log
+
+# View GPU usage
+nvidia-smi
+
+# View system resources
+htop
+```
+
+Welcome to submit Issues and Pull Requests to improve this project!
+
+**Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
--- a/docs/EN/source/deploy_guides/deploy_local_windows.md
+++ b/docs/EN/source/deploy_guides/deploy_local_windows.md
+# Windows Local Deployment Guide
+
+## 📖 Overview
+
+This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
+
+## 🚀 Quick Start
+
+### Environment Requirements
+
+#### Hardware Requirements
+- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
+- **Memory**: Recommended 16GB+ RAM
+- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
+
+
+## 🎯 Usage Methods
+
+### Method 1: Using Batch File Inference
+
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
+
+### Method 2: Using Gradio Web Interface Inference
+
+#### Manual Gradio Configuration
+
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
+
+#### One-Click Gradio Startup (Recommended)
+
+**📦 Download Software Package**
+- [Quark Cloud](https://pan.quark.cn/s/8af1162d7a15)
+
+**📁 Directory Structure**
+After extraction, ensure the directory structure is as follows:
+
+```
+├── env/                        # LightX2V environment directory
+├── LightX2V/                   # LightX2V project directory
+├── start_lightx2v.bat          # One-click startup script
+├── lightx2v_config.txt         # Configuration file
+├── LightX2V使用说明.txt         # LightX2V usage instructions
+├── outputs/                    # Generated video save directory
+└── models/                     # Model storage directory
+```
+
+**📥 Model Download**:
+
+Refer to [Model Structure Documentation](../getting_started/model_structure.md) or [Gradio Deployment Guide](./deploy_gradio.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
+
+
+**📋 Configuration Parameters**
+
+Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
+
+```ini
+
+# Interface language (zh: Chinese, en: English)
+lang=en
+
+# Server port
+port=8032
+
+# GPU device ID (0, 1, 2...)
+gpu=0
+
+# Model path
+model_path=models/
+```
+
+**🚀 Start Service**
+
+Double-click to run the `start_lightx2v.bat` file, the script will:
+1. Automatically read configuration file
+2. Verify model paths and file integrity
+3. Start Gradio Web interface
+4. Automatically open browser to access service
+
+
+![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
+
+**⚠️ Important Notes**:
+- **Display Issues**: If the webpage opens blank or displays abnormally, please run `pip install --upgrade gradio` to upgrade the Gradio version.
+
+### Method 3: Using ComfyUI Inference
+
+This guide will instruct you on how to download and use the portable version of the Lightx2v-ComfyUI environment, so you can avoid manual environment configuration steps. This is suitable for users who want to quickly start experiencing accelerated video generation with Lightx2v on Windows systems.
+
+#### Download the Windows Portable Environment:
+
+- [Baidu Cloud Download](https://pan.baidu.com/s/1FVlicTXjmXJA1tAVvNCrBw?pwd=wfid), extraction code: wfid
+
+The portable environment already packages all Python runtime dependencies, including the code and dependencies for ComfyUI and LightX2V. After downloading, simply extract to use.
+
+After extraction, the directory structure is as follows:
+
+```shell
+lightx2v_env
+├──📂 ComfyUI                    # ComfyUI code
+├──📂 portable_python312_embed   # Standalone Python environment
+└── run_nvidia_gpu.bat            # Windows startup script (double-click to start)
+```
+
+#### Start ComfyUI
+
+Directly double-click the run_nvidia_gpu.bat file. The system will open a Command Prompt window and run the program. The first startup may take a while, please be patient. After startup is complete, the browser will automatically open and display the ComfyUI frontend interface.
+
+![i2v example workflow](../../../../assets/figs/portabl_windows/pic1.png)
+
+The plugin used by LightX2V-ComfyUI is [ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper). Example workflows can be obtained from this project.
+
+#### Tested Graphics Cards (offload mode)
+
+- Tested model: `Wan2.1-I2V-14B-480P`
+
+| GPU Model   | Task Type   | VRAM Capacity | Actual Max VRAM Usage | Actual Max RAM Usage |
+|:-----------|:------------|:--------------|:---------------------|:---------------------|
+| 3090Ti     | I2V         | 24G           | 6.1G                 | 7.1G                 |
+| 3080Ti     | I2V         | 12G           | 6.1G                 | 7.1G                 |
+| 3060Ti     | I2V         | 8G            | 6.1G                 | 7.1G                 |
+| 4070Ti Super    | I2V        | 16G         | 6.1G        | 7.1G          |
+| 4070    | I2V        | 12G         | 6.1G        | 7.1G          |
+| 4060    | I2V        | 8G         | 6.1G        | 7.1G          |
+
+#### Environment Packaging and Usage Reference
+- [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
+- [Portable-Windows-ComfyUI-Docs](https://docs.comfy.org/zh-CN/installation/comfyui_portable_windows#portable-%E5%8F%8A%E8%87%AA%E9%83%A8%E7%BD%B2)
--- a/docs/EN/source/deploy_guides/deploy_service.md
+++ b/docs/EN/source/deploy_guides/deploy_service.md
+# Service Deployment
+
+lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/lightx2v/blob/main/lightx2v/api_server.py)
+
+### Start the Service
+
+```shell
+# Modify the paths in the script
+bash scripts/start_server.sh
+```
+
+The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
+
+### Client Sends Request
+
+```shell
+python scripts/post.py
+```
+
+The service endpoint is: `/v1/tasks/`
+
+The `message` parameter in `scripts/post.py` is as follows:
+
+```python
+message = {
+    "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
+    "negative_prompt": "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
+    "image_path": "",
+}
+```
+
+1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed.
+
+
+### Client Checks Server Status
+
+```shell
+python scripts/check_status.py
+```
+
+The service endpoints include:
+
+1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
+
+2. `/v1/tasks/` is used to get all tasks received and completed by the server.
+
+3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
+
+### Client Stops the Current Task on the Server at Any Time
+
+```shell
+python scripts/stop_running_task.py
+```
+
+The service endpoint is: `/v1/tasks/running`
+
+After terminating the task, the server will not exit but will return to waiting for new requests.
+
+### Starting Multiple Services on a Single Node
+
+On a single node, you can start multiple services using `scripts/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/start_multi_servers.sh`:
+
+```shell
+num_gpus=8 bash scripts/start_multi_servers.sh
+```
+
+Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
+
+### Scheduling Between Multiple Services
+
+```shell
+python scripts/post_multi_servers.py
+```
+
+`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
+
+### API Endpoints Summary
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1/tasks/` | POST | Create video generation task |
+| `/v1/tasks/form` | POST | Create video generation task via form |
+| `/v1/tasks/` | GET | Get all task list |
+| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
+| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
+| `/v1/tasks/running` | DELETE | Stop currently running task |
+| `/v1/files/download/{file_path}` | GET | Download file |
+| `/v1/service/status` | GET | Get service status |
--- a/docs/EN/source/deploy_guides/for_low_latency.md
+++ b/docs/EN/source/deploy_guides/for_low_latency.md
+# Deployment for Low Latency Scenarios
+
+In low latency scenarios, we pursue faster speed, ignoring issues such as video memory and RAM overhead. We provide two solutions:
+
+## 💡 Solution 1: Inference with Step Distillation Model
+
+This solution can refer to the [Step Distillation Documentation](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/step_distill.html)
+
+🧠 **Step Distillation** is a very direct acceleration inference solution for video generation models. By distilling from 50 steps to 4 steps, the time consumption will be reduced to 4/50 of the original. At the same time, under this solution, it can still be combined with the following solutions:
+1. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html)
+2. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html)
+
+## 💡 Solution 2: Inference with Non-Step Distillation Model
+
+Step distillation requires relatively large training resources, and the model after step distillation may have degraded video dynamic range.
+
+For the original model without step distillation, we can use the following solutions or a combination of multiple solutions for acceleration:
+
+1. [Parallel Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/parallel.html) for multi-GPU parallel acceleration.
+2. [Feature Caching](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/cache.html) to reduce the actual inference steps.
+3. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html) to accelerate Attention inference.
+4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
+5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
+
+## 💡 Using Tiny VAE
+
+In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
+
+```python
+{
+    "use_tae": true,
+    "tae_path": "/path to taew2_1.pth"
+}
+```
+The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
+
+## ⚠️ Note
+
+Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
+
+If you have any questions, feel free to report bugs or request features in [🐛 GitHub Issues](https://github.com/ModelTC/lightx2v/issues)
--- a/docs/EN/source/deploy_guides/for_low_resource.md
+++ b/docs/EN/source/deploy_guides/for_low_resource.md
+# Lightx2v Low-Resource Deployment Guide
+
+## 📋 Overview
+
+This guide is specifically designed for hardware resource-constrained environments, particularly configurations with **8GB VRAM + 16/32GB RAM**, providing detailed instructions on how to successfully run Lightx2v 14B models for 480p and 720p video generation.
+
+Lightx2v is a powerful video generation model, but it requires careful optimization to run smoothly in resource-constrained environments. This guide provides a complete solution from hardware selection to software configuration, ensuring you can achieve the best video generation experience under limited hardware conditions.
+
+## 🎯 Target Hardware Configuration
+
+### Recommended Hardware Specifications
+
+**GPU Requirements**:
+- **VRAM**: 8GB (RTX 3060/3070/4060/4060Ti, etc.)
+- **Architecture**: NVIDIA graphics cards with CUDA support
+
+**System Memory**:
+- **Minimum**: 16GB DDR4
+- **Recommended**: 32GB DDR4/DDR5
+- **Memory Speed**: 3200MHz or higher recommended
+
+**Storage Requirements**:
+- **Type**: NVMe SSD strongly recommended
+- **Capacity**: At least 50GB available space
+- **Speed**: Read speed of 3000MB/s or higher recommended
+
+**CPU Requirements**:
+- **Cores**: 8 cores or more recommended
+- **Frequency**: 3.0GHz or higher recommended
+- **Architecture**: Support for AVX2 instruction set
+
+## ⚙️ Core Optimization Strategies
+
+### 1. Environment Optimization
+
+Before running Lightx2v, it's recommended to set the following environment variables to optimize performance:
+
+```bash
+# CUDA memory allocation optimization
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
+# Enable CUDA Graph mode to improve inference performance
+export ENABLE_GRAPH_MODE=true
+
+# Use BF16 precision for inference to reduce VRAM usage (default FP32 precision)
+export DTYPE=BF16
+```
+
+**Optimization Details**:
+- `expandable_segments:True`: Allows dynamic expansion of CUDA memory segments, reducing memory fragmentation
+- `ENABLE_GRAPH_MODE=true`: Enables CUDA Graph to reduce kernel launch overhead
+- `DTYPE=BF16`: Uses BF16 precision to reduce VRAM usage while maintaining quality
+
+### 2. Quantization Strategy
+
+Quantization is a key optimization technique in low-resource environments, reducing memory usage by lowering model precision.
+
+#### Quantization Scheme Comparison
+
+**FP8 Quantization** (Recommended for RTX 40 series):
+```python
+# Suitable for GPUs supporting FP8, providing better precision
+dit_quant_scheme = "fp8"      # DIT model quantization
+t5_quant_scheme = "fp8"       # T5 text encoder quantization
+clip_quant_scheme = "fp8"     # CLIP visual encoder quantization
+```
+
+**INT8 Quantization** (Universal solution):
+```python
+# Suitable for all GPUs, minimal memory usage
+dit_quant_scheme = "int8"     # 8-bit integer quantization
+t5_quant_scheme = "int8"      # Text encoder quantization
+clip_quant_scheme = "int8"    # Visual encoder quantization
+```
+
+### 3. Efficient Operator Selection Guide
+
+Choosing the right operators can significantly improve inference speed and reduce memory usage.
+
+#### Attention Operator Selection
+
+**Recommended Priority**:
+1. **[Sage Attention](https://github.com/thu-ml/SageAttention)** (Highest priority)
+
+2. **[Flash Attention](https://github.com/Dao-AILab/flash-attention)** (Universal solution)
+
+#### Matrix Multiplication Operator Selection
+
+**ADA Architecture GPUs** (RTX 40 series):
+
+Recommended priority:
+1. **[q8-kernel](https://github.com/KONAKONA666/q8_kernels)** (Highest performance, ADA architecture only)
+2. **[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)** (Balanced solution)
+3. **[vllm-kernel](https://github.com/vllm-project/vllm)** (Universal solution)
+
+**Other Architecture GPUs**:
+1. **[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)** (Recommended)
+2. **[vllm-kernel](https://github.com/vllm-project/vllm)** (Alternative)
+
+### 4. Parameter Offloading Strategy
+
+Parameter offloading technology allows models to dynamically schedule parameters between CPU and disk, breaking through VRAM limitations.
+
+#### Three-Level Offloading Architecture
+
+```python
+# Disk-CPU-GPU three-level offloading configuration
+cpu_offload=True             # Enable CPU offloading
+t5_cpu_offload=True          # Enable T5 encoder CPU offloading
+offload_granularity=phase    # DIT model fine-grained offloading
+t5_offload_granularity=block # T5 encoder fine-grained offloading
+lazy_load = True             # Enable lazy loading mechanism
+num_disk_workers = 2         # Disk I/O worker threads
+```
+
+#### Offloading Strategy Details
+
+**Lazy Loading Mechanism**:
+- Model parameters are loaded from disk to CPU on demand
+- Reduces runtime memory usage
+- Supports large models running with limited memory
+
+**Disk Storage Optimization**:
+- Use high-speed SSD to store model parameters
+- Store model files grouped by blocks
+- Refer to conversion script [documentation](https://github.com/ModelTC/lightx2v/tree/main/tools/convert/readme.md), specify `--save_by_block` parameter during conversion
+
+### 5. VRAM Optimization Techniques
+
+VRAM optimization strategies for 720p video generation.
+
+#### CUDA Memory Management
+
+```python
+# CUDA memory cleanup configuration
+clean_cuda_cache = True        # Timely cleanup of GPU cache
+rotary_chunk = True            # Rotary position encoding chunked computation
+rotary_chunk_size = 100        # Chunk size, adjustable based on VRAM
+```
+
+#### Chunked Computation Strategy
+
+**Rotary Position Encoding Chunking**:
+- Process long sequences in small chunks
+- Reduce peak VRAM usage
+- Maintain computational precision
+
+### 6. VAE Optimization
+
+VAE (Variational Autoencoder) is a key component in video generation, and optimizing VAE can significantly improve performance.
+
+#### VAE Chunked Inference
+
+```python
+# VAE optimization configuration
+use_tiling_vae = True          # Enable VAE chunked inference
+```
+
+#### Lightweight VAE
+
+```python
+# VAE optimization configuration
+use_tae = True            # Use lightweight VAE
+tae_path = "/path to taew2_1.pth"
+```
+You can download taew2_1.pth [here](https://github.com/madebyollin/taehv/blob/main/taew2_1.pth)
+
+**VAE Optimization Effects**:
+- Standard VAE: Baseline performance, 100% quality retention
+- Standard VAE chunked: Reduces VRAM usage, increases inference time, 100% quality retention
+- Lightweight VAE: Extremely low VRAM usage, video quality loss
+
+### 7. Model Selection Strategy
+
+Choosing the right model version is crucial for low-resource environments.
+
+#### Recommended Model Comparison
+
+**Distilled Models** (Strongly recommended):
+- ✅ **[Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v)**
+
+- ✅ **[Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v)**
+
+#### Performance Optimization Suggestions
+
+When using the above distilled models, you can further optimize performance:
+- Disable CFG: `"enable_cfg": false`
+- Reduce inference steps: `infer_step: 4`
+- Reference configuration files: [config](https://github.com/ModelTC/LightX2V/tree/main/configs/distill)
+
+## 🚀 Complete Configuration Examples
+
+### Pre-configured Templates
+
+- **[14B Model 480p Video Generation Configuration](https://github.com/ModelTC/lightx2v/tree/main/configs/offload/disk/wan_i2v_phase_lazy_load_480p.json)**
+
+- **[14B Model 720p Video Generation Configuration](https://github.com/ModelTC/lightx2v/tree/main/configs/offload/disk/wan_i2v_phase_lazy_load_720p.json)**
+
+- **[1.3B Model 720p Video Generation Configuration](https://github.com/ModelTC/LightX2V/tree/main/configs/offload/block/wan_t2v_1_3b.json)**
+  - The inference bottleneck for 1.3B models is the T5 encoder, so the configuration file specifically optimizes for T5
+
+**[Launch Script](https://github.com/ModelTC/LightX2V/tree/main/scripts/wan/run_wan_i2v_lazy_load.sh)**
+
+## 📚 Reference Resources
+
+- [Parameter Offloading Mechanism Documentation](../method_tutorials/offload.md) - In-depth understanding of offloading technology principles
+- [Quantization Technology Guide](../method_tutorials/quantization.md) - Detailed explanation of quantization technology
+- [Gradio Deployment Guide](deploy_gradio.md) - Detailed Gradio deployment instructions
+
+## ⚠️ Important Notes
+
+1. **Hardware Requirements**: Ensure your hardware meets minimum configuration requirements
+2. **Driver Version**: Recommend using the latest NVIDIA drivers (535+)
+3. **CUDA Version**: Ensure CUDA version is compatible with PyTorch (recommend CUDA 11.8+)
+4. **Storage Space**: Reserve sufficient disk space for model caching (at least 50GB)
+5. **Network Environment**: Stable network connection required for initial model download
+6. **Environment Variables**: Be sure to set the recommended environment variables to optimize performance
+
+**Technical Support**: If you encounter issues, please submit an Issue to the project repository.