Initial commit

e2778d0d · litzh · e2778d0d · e2778d0d · e2778d0d · e2778d0d
Commit e2778d0d authored Feb 05, 2026 by litzh
20 changed files
--- a/configs/worldplay/worldplay_distill_i2v_480p.json
+++ b/configs/worldplay/worldplay_distill_i2v_480p.json
+{
+    "model_cls": "worldplay_distill",
+    "task": "i2v",
+    "infer_steps": 4,
+    "denoising_step_list": [0, 250, 500, 750],
+    "transformer_model_name": "480p_i2v",
+    "target_video_length": 125,
+    "aspect_ratio": "16:9",
+    "vae_stride": [4, 16, 16],
+    "sample_shift": 1.0,
+    "sample_guide_scale": -1.0,
+    "enable_cfg": false,
+    "attn_type": "flash_attn2",
+    "model_type": "ar",
+    "chunk_latent_frames": 4,
+    "use_prope": true,
+    "hidden_size": 2048,
+    "heads_num": 16,
+    "mm_double_blocks_depth": 54,
+    "patch_size": [1, 1, 1],
+    "rope_dim_list": [16, 56, 56],
+    "rope_theta": 256,
+    "in_channels": 32,
+    "out_channels": 32,
+    "feature_caching": "NoCaching",
+    "cpu_offload": true,
+    "offload_granularity": "block",
+    "use_prompt_enhancer": false,
+    "use_image_encoder": true,
+    "seq_parallel": false,
+    "parallel": {},
+    "is_sr_running": false
+}
--- a/configs/z_image/z_image_turbo_t2i.json
+++ b/configs/z_image/z_image_turbo_t2i.json
+{
+    "aspect_ratio": "16:9",
+    "num_channels_latents": 16,
+    "infer_steps": 9,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 0.0,
+    "patch_size": 2
+}
--- a/configs/z_image/z_image_turbo_t2i_fp8.json
+++ b/configs/z_image/z_image_turbo_t2i_fp8.json
+{
+    "aspect_ratio": "16:9",
+    "num_channels_latents": 16,
+    "infer_steps": 9,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 0.0,
+    "patch_size": 2,
+    "dit_quantized": true,
+    "dit_quant_scheme": "fp8-sgl",
+    "dit_quantized_ckpt": "/path/to/z_image_turbo_fp8.safetensors"
+}
--- a/configs/z_image/z_image_turbo_t2i_offload.json
+++ b/configs/z_image/z_image_turbo_t2i_offload.json
+{
+    "aspect_ratio": "16:9",
+    "num_channels_latents": 16,
+    "infer_steps": 9,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 0.0,
+    "patch_size": 2,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "qwen3_quantized": true,
+    "qwen3_quant_scheme": "int4",
+    "qwen3_quantized_ckpt": "JunHowie/Qwen3-4B-GPTQ-Int4",
+    "dit_quantized": true,
+    "dit_quant_scheme": "fp8-sgl",
+    "dit_quantized_ckpt": "lightx2v/Z-Image-Turbo-Quantized/z_image_turbo_scaled_fp8_e4m3fn.safetensors"
+}
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
+WORKDIR /app
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
+    && python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
+    && make build && make clean
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy soundfile jsonschema pymongo modelscope
+RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
+RUN cd flash-attention && python setup.py install && rm -rf build
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
+RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
+RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+RUN git clone https://github.com/Tencent-Hunyuan/flex-block-attn.git --recursive
+RUN cd flex-block-attn && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
+RUN cd FlashVSR && pip install --no-cache-dir -v -e .
+COPY lightx2v_kernel /app/lightx2v_kernel
+RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
+    uv build --wheel \
+        -Cbuild-dir=build . \
+        -Ccmake.define.CUTLASS_PATH=/app/cutlass \
+        --verbose \
+        --color=always \
+        --no-build-isolation \
+    && pip install dist/*whl --force-reinstall --no-deps \
+    && rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+RUN ldconfig
+# for base docker
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+RUN git clone https://github.com/thu-ml/SpargeAttn.git --depth 1
+RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+# for 5090 docker
+# RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
+# RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+# RUN git clone https://github.com/ModelTC/SpargeAttn.git --depth 1
+# RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="12.0" pip install --no-cache-dir --no-build-isolation -v -e .
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_5090
+++ b/dockerfiles/Dockerfile_5090
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel AS base
+WORKDIR /app
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+RUN apt-get update && apt-get install -y vim tmux zip unzip bzip2 wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev libjpeg-dev libpng-dev \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && conda clean -all -y
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+RUN git clone https://github.com/vllm-project/vllm.git && cd vllm \
+    && python use_existing_torch.py && pip install --no-cache-dir -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel \
+    && make build && make clean
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord matplotlib debugpy soundfile jsonschema pymongo modelscope
+RUN git clone https://github.com/Dao-AILab/flash-attention.git --recursive
+RUN cd flash-attention && python setup.py install && rm -rf build
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
+RUN git clone https://github.com/SandAI-org/MagiAttention.git --recursive
+RUN cd MagiAttention && TORCH_CUDA_ARCH_LIST="9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+RUN git clone https://github.com/Tencent-Hunyuan/flex-block-attn.git --recursive
+RUN cd flex-block-attn && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/FlashVSR.git --depth 1
+RUN cd FlashVSR && pip install --no-cache-dir -v -e .
+COPY lightx2v_kernel /app/lightx2v_kernel
+RUN git clone https://github.com/NVIDIA/cutlass.git --depth 1 && cd /app/lightx2v_kernel && MAX_JOBS=32 && CMAKE_BUILD_PARALLEL_LEVEL=4 \
+    uv build --wheel \
+        -Cbuild-dir=build . \
+        -Ccmake.define.CUTLASS_PATH=/app/cutlass \
+        --verbose \
+        --color=always \
+        --no-build-isolation \
+    && pip install dist/*whl --force-reinstall --no-deps \
+    && rm -rf /app/lightx2v_kernel && rm -rf /app/cutlass
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu/libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.27.2 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.27.2 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+RUN ldconfig
+# for base docker
+# RUN git clone https://github.com/KONAKONA666/q8_kernels.git --depth 1
+# RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+# RUN git clone https://github.com/thu-ml/SpargeAttn.git --depth 1
+# RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" pip install --no-cache-dir --no-build-isolation -v -e .
+# for 5090 docker
+RUN git clone https://github.com/ModelTC/LTX-Video-Q8-Kernels.git --depth 1
+RUN cd LTX-Video-Q8-Kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SpargeAttn.git --depth 1
+RUN cd SpargeAttn && TORCH_CUDA_ARCH_LIST="12.0" pip install --no-cache-dir --no-build-isolation -v -e .
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_cu124
+++ b/dockerfiles/Dockerfile_cu124
+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel AS base
+WORKDIR /app
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV LD_LIBRARY_PATH=/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+RUN apt-get update && apt-get install -y vim tmux zip unzip wget git git-lfs build-essential libibverbs-dev ca-certificates \
+    curl iproute2 libsm6 libxext6 kmod ccache libnuma-dev libssl-dev flex bison libgtk-3-dev libpango1.0-dev \
+    libsoup2.4-dev libnice-dev libopus-dev libvpx-dev libx264-dev libsrtp2-dev libglib2.0-dev libdrm-dev\
+    && apt-get clean && rm -rf /var/lib/apt/lists/* && git lfs install
+RUN pip install --no-cache-dir packaging ninja cmake scikit-build-core uv meson ruff pre-commit fastapi uvicorn requests -U
+RUN git clone https://github.com/vllm-project/vllm.git -b v0.10.0 && cd vllm \
+    && python use_existing_torch.py && pip install -r requirements/build.txt \
+    && pip install --no-cache-dir --no-build-isolation -v -e .
+RUN git clone https://github.com/sgl-project/sglang.git -b v0.4.10 && cd sglang/sgl-kernel \
+    && make build && make clean
+RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
+    imageio-ffmpeg einops loguru qtorch ftfy av decord
+RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y
+RUN git clone https://github.com/Dao-AILab/flash-attention.git -b v2.8.3 --recursive
+RUN cd flash-attention && python setup.py install && rm -rf build
+RUN cd flash-attention/hopper && python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SageAttention.git
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
+# cloud deploy
+RUN pip install --no-cache-dir aio-pika asyncpg>=0.27.0 aioboto3>=12.0.0 PyJWT alibabacloud_dypnsapi20170525==1.2.2 redis==6.4.0 tos
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+ENV PATH=/root/.cargo/bin:$PATH
+RUN cd /opt \
+    && wget https://mirrors.tuna.tsinghua.edu.cn/gnu//libiconv/libiconv-1.15.tar.gz \
+    && tar zxvf libiconv-1.15.tar.gz \
+    && cd libiconv-1.15 \
+    && ./configure \
+    && make \
+    && make install \
+    && rm -rf /opt/libiconv-1.15
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gstreamer.git -b 1.24.12 --depth 1 \
+    && cd gstreamer \
+    && meson setup builddir \
+    && meson compile -C builddir \
+    && meson install -C builddir \
+    && ldconfig \
+    && rm -rf /opt/gstreamer
+RUN cd /opt \
+    && git clone https://github.com/GStreamer/gst-plugins-rs.git -b gstreamer-1.24.12 --depth 1 \
+    && cd gst-plugins-rs \
+    && cargo build --package gst-plugin-webrtchttp --release \
+    && install -m 644 target/release/libgstwebrtchttp.so $(pkg-config --variable=pluginsdir gstreamer-1.0)/ \
+    && rm -rf /opt/gst-plugins-rs
+RUN ldconfig
+WORKDIR /workspace
--- a/dockerfiles/Dockerfile_deploy
+++ b/dockerfiles/Dockerfile_deploy
+FROM node:alpine3.21 AS frontend_builder
+COPY lightx2v /opt/lightx2v
+RUN cd /opt/lightx2v/deploy/server/frontend \
+    && npm install \
+    && npm run build
+FROM lightx2v/lightx2v:25111101-cu128 AS base
+RUN mkdir /workspace/LightX2V
+WORKDIR /workspace/LightX2V
+ENV PYTHONPATH=/workspace/LightX2V
+# for multi-person & animate
+RUN pip install ultralytics moviepy pydub pyannote.audio decord peft onnxruntime-gpu pandas matplotlib loguru sentencepiece
+RUN export COMMIT=0e78a118995e66bb27d78518c4bd9a3e95b4e266 \
+    && export TORCH_CUDA_ARCH_LIST="9.0" \
+    && git clone --depth 1 https://github.com/facebookresearch/sam2.git \
+    && cd sam2 \
+    && git fetch --depth 1 origin $COMMIT \
+    && git checkout $COMMIT \
+    && python setup.py install
+COPY tools tools
+COPY assets assets
+COPY configs configs
+COPY lightx2v lightx2v
+COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
+COPY --from=frontend_builder /opt/lightx2v/deploy/server/frontend/dist lightx2v/deploy/server/frontend/dist
--- a/dockerfiles/platforms/Dockerfile_ascend_910b
+++ b/dockerfiles/platforms/Dockerfile_ascend_910b
+FROM quay.io/ascend/vllm-ascend:v0.11.0rc3
+# Set envs
+ENV PYTHONPATH=/workspace/LightX2V
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/driver/lib64/driver/:/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/lib/:/usr/local/python3.11.13/lib/python3.11/site-packages/torch/lib
+ENV PATH=${PATH}:/usr/local/go/bin:/usr/local/python3.11.13/bin/
+# Install deps
+RUN apt-get update && apt-get install ffmpeg -y && \
+    pip install --no-cache-dir \
+        imageio \
+        imageio-ffmpeg \
+        ftfy \
+        aiohttp \
+        gguf \
+        loguru \
+        accelerate \
+        diffusers && \
+    pip install --no-cache-dir torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cpu
+# Copy files
+COPY app app
+COPY assets assets
+COPY configs configs
+COPY lightx2v lightx2v
+COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
+COPY scripts scripts
+COPY test_cases test_cases
+COPY tools tools
--- a/dockerfiles/platforms/Dockerfile_cambricon_mlu590
+++ b/dockerfiles/platforms/Dockerfile_cambricon_mlu590
+FROM cambricon-base/pytorch:v25.10.0-torch2.8.0-torchmlu1.29.1-ubuntu22.04-py310 AS base
+WORKDIR /workspace/LightX2V
+# Set envs
+ENV PYTHONPATH=/workspace/LightX2V
+ENV LD_LIBRARY_PATH=/usr/local/neuware/lib64:${LD_LIBRARY_PATH}
+# Install deps
+RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg && \
+    pip install --no-cache-dir \
+    ftfy \
+    imageio \
+    imageio-ffmpeg \
+    loguru \
+    aiohttp \
+    gguf \
+    diffusers \
+    peft==0.17.0 \
+    transformers==4.57.1 &&
+# Copy files
+COPY app app
+COPY assets assets
+COPY configs configs
+COPY lightx2v lightx2v
+COPY lightx2v_kernel lightx2v_kernel
+COPY lightx2v_platform lightx2v_platform
+COPY scripts scripts
+COPY test_cases test_cases
+COPY tools tools
--- a/dockerfiles/platforms/Dockerfile_mi350
+++ b/dockerfiles/platforms/Dockerfile_mi350
+# Dockerfile for LightX2V on AMD ROCm platform
+# Base image: SGLang with ROCm 7.0.0 for MI300X
+FROM lmsysorg/sglang:v0.5.6.post2-rocm700-mi35x
+LABEL maintainer="LightX2V Contributors"
+LABEL description="LightX2V video generation framework with AMD ROCm support"
+# Set working directory
+WORKDIR /workspace
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+# Install aiter (AMD ROCm optimized kernels)
+# Commit: a7d3bf8cd47afbaf6a6133c1f12e3b01d2c27b0e
+ARG AITER_COMMIT=a7d3bf8cd47afbaf6a6133c1f12e3b01d2c27b0e
+RUN git clone https://github.com/ROCm/aiter.git /tmp/aiter && \
+    cd /tmp/aiter && \
+    git checkout ${AITER_COMMIT} && \
+    pip install --no-cache-dir -e . && \
+    rm -rf /tmp/aiter/.git
+# Install flash-attn for ROCm
+RUN pip install --no-cache-dir flash-attn --no-build-isolation
+# Copy LightX2V source
+COPY . /workspace/LightX2V
+# Install LightX2V dependencies
+WORKDIR /workspace/LightX2V
+RUN pip install --no-cache-dir -r requirements.txt
+# Install LightX2V
+RUN pip install --no-cache-dir -e .
+# Set environment variables for AMD ROCm
+ENV HIP_VISIBLE_DEVICES=0
+ENV ROCM_PATH=/opt/rocm
+ENV HSA_FORCE_FINE_GRAIN_PCIE=1
+# Default command
+CMD ["python", "-c", "from lightx2v import LightX2VPipeline; print('LightX2V AMD ROCm ready!')"]
--- a/docs/EN/.readthedocs.yaml
+++ b/docs/EN/.readthedocs.yaml
+version: 2
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.10"
+formats:
+    - epub
+sphinx:
+  configuration: docs/EN/source/conf.py
+python:
+  install:
+    - requirements: requirements-docs.txt
--- a/docs/EN/Makefile
+++ b/docs/EN/Makefile
+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/EN/make.bat
+++ b/docs/EN/make.bat
+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+if "%1" == "" goto help
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd
--- a/docs/EN/source/conf.py
+++ b/docs/EN/source/conf.py
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+import logging
+import os
+import sys
+from typing import List
+import sphinxcontrib.redoc
+from sphinx.ext import autodoc
+logger = logging.getLogger(__name__)
+sys.path.append(os.path.abspath("../.."))
+# -- Project information -----------------------------------------------------
+project = "Lightx2v"
+copyright = "2025, Lightx2v Team"
+author = "the Lightx2v Team"
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
+    "myst_parser",
+    "sphinxarg.ext",
+    "sphinxcontrib.redoc",
+    "sphinxcontrib.openapi",
+]
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+]
+html_static_path = ["_static"]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns: List[str] = ["**/*.template.rst"]
+# Exclude the prompt "$" when copying code
+copybutton_prompt_text = r"\$ "
+copybutton_prompt_is_regexp = True
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_title = project
+html_theme = "sphinx_book_theme"
+# html_theme = 'sphinx_rtd_theme'
+html_logo = "../../../assets/img_lightx2v.png"
+html_theme_options = {
+    "path_to_docs": "docs/EN/source",
+    "repository_url": "https://github.com/ModelTC/lightx2v",
+    "use_repository_button": True,
+}
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = ['_static']
+# Generate additional rst documentation here.
+def setup(app):
+    # from docs.source.generate_examples import generate_examples
+    # generate_examples()
+    pass
+# Mock out external dependencies here.
+autodoc_mock_imports = [
+    "cpuinfo",
+    "torch",
+    "transformers",
+    "psutil",
+    "prometheus_client",
+    "sentencepiece",
+    "lightllmnumpy",
+    "tqdm",
+    "tensorizer",
+]
+for mock_target in autodoc_mock_imports:
+    if mock_target in sys.modules:
+        logger.info(
+            "Potentially problematic mock target (%s) found; autodoc_mock_imports cannot mock modules that have already been loaded into sys.modules when the sphinx build starts.",
+            mock_target,
+        )
+class MockedClassDocumenter(autodoc.ClassDocumenter):
+    """Remove note about base class when a class is derived from object."""
+    def add_line(self, line: str, source: str, *lineno: int) -> None:
+        if line == "   Bases: :py:class:`object`":
+            return
+        super().add_line(line, source, *lineno)
+autodoc.ClassDocumenter = MockedClassDocumenter
+navigation_with_keys = False
--- a/docs/EN/source/deploy_guides/deploy_comfyui.md
+++ b/docs/EN/source/deploy_guides/deploy_comfyui.md
+# ComfyUI Deployment
+## ComfyUI-Lightx2vWrapper
+The official ComfyUI integration nodes for LightX2V are now available in a dedicated repository, providing a complete modular configuration system and optimization features.
+### Project Repository
+- GitHub: [https://github.com/ModelTC/ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper)
+### Key Features
+- Modular Configuration System: Separate nodes for each aspect of video generation
+- Support for both Text-to-Video (T2V) and Image-to-Video (I2V) generation modes
+- Advanced Optimizations:
+  - TeaCache acceleration (up to 3x speedup)
+  - Quantization support (int8, fp8)
+  - Memory optimization with CPU offloading
+  - Lightweight VAE options
+- LoRA Support: Chain multiple LoRA models for customization
+- Multiple Model Support: wan2.1, hunyuan architectures
+### Installation and Usage
+Please visit the GitHub repository above for detailed installation instructions, usage tutorials, and example workflows.
--- a/docs/EN/source/deploy_guides/deploy_gradio.md
+++ b/docs/EN/source/deploy_guides/deploy_gradio.md
+# Gradio Deployment Guide
+## 📖 Overview
+Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
+For Windows systems, we provide a convenient one-click deployment solution with automatic environment configuration and intelligent parameter optimization. Please refer to the [One-Click Gradio Startup (Recommended)](./deploy_local_windows.md/#one-click-gradio-startup-recommended) section for detailed instructions.
+![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
+## 📁 File Structure
+```
+LightX2V/app/
+├── gradio_demo.py          # English interface demo
+├── gradio_demo_zh.py       # Chinese interface demo
+├── run_gradio.sh          # Startup script
+├── README.md              # Documentation
+├── outputs/               # Generated video save directory
+└── inference_logs.log     # Inference logs
+```
+This project contains two main demo files:
+- `gradio_demo.py` - English interface version
+- `gradio_demo_zh.py` - Chinese interface version
+## 🚀 Quick Start
+### Environment Requirements
+Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
+#### Recommended Optimization Library Configuration
+- ✅ [Flash attention](https://github.com/Dao-AILab/flash-attention)
+- ✅ [Sage attention](https://github.com/thu-ml/SageAttention)
+- ✅ [vllm-kernel](https://github.com/vllm-project/vllm)
+- ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
+- ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
+Install according to the project homepage tutorials for each operator as needed.
+### 📥 Model Download
+Models can be downloaded with one click through the frontend interface, with two download sources provided: HuggingFace and ModelScope. You can choose according to your situation. You can also refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
+#### wan2.1 Model Directory Structure
+```
+models/
+├── wan2.1_i2v_720p_lightx2v_4step.safetensors                   # Original precision
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors   # FP8 quantization
+├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors              # INT8 quantization
+├── wan2.1_i2v_720p_int8_lightx2v_4step_split                    # INT8 quantization block storage directory
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split         # FP8 quantization block storage directory
+├── Other weights (e.g., t2v)
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # Model configuration file
+```
+#### wan2.2 Model Directory Structure
+```
+models/
+├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors        # high noise original precision
+├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors    # high noise FP8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors   # high noise INT8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split         # high noise INT8 quantization block storage directory
+├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors         # low noise original precision
+├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors     # low noise FP8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors    # low noise INT8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split          # low noise INT8 quantization block storage directory
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # Model configuration file
+```
+**📝 Download Instructions**:
+- Model weights can be downloaded from HuggingFace:
+  - [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
+  - [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
+- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoders)
+- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
+- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
+### Startup Methods
+#### Method 1: Using Startup Script (Recommended)
+**Linux Environment:**
+```bash
+# 1. Edit the startup script to configure relevant paths
+cd app/
+vim run_gradio.sh
+# Configuration items that need to be modified:
+# - lightx2v_path: Lightx2v project root directory path
+# - model_path: Model root directory path (contains all model files)
+# 💾 Important note: Recommend pointing model paths to SSD storage locations
+# Example: /mnt/ssd/models/ or /data/ssd/models/
+# 2. Run the startup script
+bash run_gradio.sh
+# 3. Or start with parameters
+bash run_gradio.sh --lang en --port 8032
+bash run_gradio.sh --lang zh --port 7862
+```
+**Windows Environment:**
+```cmd
+# 1. Edit the startup script to configure relevant paths
+cd app\
+notepad run_gradio_win.bat
+# Configuration items that need to be modified:
+# - lightx2v_path: Lightx2v project root directory path
+# - model_path: Model root directory path (contains all model files)
+# 💾 Important note: Recommend pointing model paths to SSD storage locations
+# Example: D:\models\ or E:\models\
+# 2. Run the startup script
+run_gradio_win.bat
+# 3. Or start with parameters
+run_gradio_win.bat --lang en --port 8032
+run_gradio_win.bat --lang zh --port 7862
+```
+#### Method 2: Direct Command Line Startup
+```bash
+pip install -v git+https://github.com/ModelTC/LightX2V.git
+```
+**Linux Environment:**
+**English Interface Version:**
+```bash
+python gradio_demo.py \
+    --model_path /path/to/models \
+    --server_name 0.0.0.0 \
+    --server_port 7862
+```
+**Chinese Interface Version:**
+```bash
+python gradio_demo_zh.py \
+    --model_path /path/to/models \
+    --server_name 0.0.0.0 \
+    --server_port 7862
+```
+**Windows Environment:**
+**English Interface Version:**
+```cmd
+python gradio_demo.py ^
+    --model_path D:\models ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
+**Chinese Interface Version:**
+```cmd
+python gradio_demo_zh.py ^
+    --model_path D:\models ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
+**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
+## 📋 Command Line Parameters
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
+| `--server_port` | int | ❌ | 7862 | Server port |
+| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
+| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
+**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
+## 🎯 Features
+### Model Configuration
+- **Model Type**: Supports wan2.1 and wan2.2 model architectures
+- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
+- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
+- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
+- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
+### Input Parameters
+- **Prompt**: Describe the expected video content
+- **Negative Prompt**: Specify elements you don't want to appear
+- **Input Image**: Upload input image required in i2v mode
+- **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
+- **Random Seed**: Controls the randomness of generation results
+- **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
+### Video Parameters
+- **FPS**: Frames per second
+- **Total Frames**: Video length
+- **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
+- **Distribution Shift**: Controls generation style deviation degree (0-10)
+## 🔧 Auto-Configuration Feature
+The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
+- **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
+- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
+- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
+- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
+### Log Viewing
+```bash
+# View inference logs
+tail -f inference_logs.log
+# View GPU usage
+nvidia-smi
+# View system resources
+htop
+```
+Welcome to submit Issues and Pull Requests to improve this project!
+**Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
--- a/docs/EN/source/deploy_guides/deploy_local_windows.md
+++ b/docs/EN/source/deploy_guides/deploy_local_windows.md
+# Windows Local Deployment Guide
+## 📖 Overview
+This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
+## 🚀 Quick Start
+### Environment Requirements
+#### Hardware Requirements
+- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
+- **Memory**: Recommended 16GB+ RAM
+- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
+## 🎯 Usage Methods
+### Method 1: Using Batch File Inference
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
+### Method 2: Using Gradio Web Interface Inference
+#### Manual Gradio Configuration
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
+#### One-Click Gradio Startup (Recommended)
+**📦 Download Software Package**
+- [Quark Cloud](https://pan.quark.cn/s/f44023dcf8c8)
+**📁 Directory Structure**
+After extraction, ensure the directory structure is as follows:
+```
+├── env/                        # LightX2V environment directory
+├── LightX2V/                   # LightX2V project directory
+├── start_lightx2v.bat          # One-click startup script
+├── lightx2v_config.txt         # Configuration file
+├── LightX2V使用说明.txt         # LightX2V usage instructions
+├── outputs/                    # Generated video save directory
+└── models/                     # Model storage directory
+```
+**⚠️ Note for RTX 50 Series GPU Users**: We provide a dedicated runtime environment. Please download from [Quark Cloud](https://pan.quark.cn/s/52b9a8c8f07a), extract and replace the `env/` directory in the software package.
+**📥 Model Download**:
+You can directly download from the Gradio frontend, with two download sources provided: HuggingFace and ModelScope. You can choose according to your situation, or refer to the [Model Structure Documentation](../getting_started/model_structure.md) to manually download.
+**📋 Configuration Parameters**
+Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
+```ini
+# Interface language (zh: Chinese, en: English)
+lang=en
+# Server port
+port=8032
+# GPU device ID (0, 1, 2...)
+gpu=0
+# Model path
+model_path=models/
+```
+**🚀 Start Service**
+Double-click to run the `start_lightx2v.bat` file, the script will:
+1. Automatically read configuration file
+2. Verify model paths and file integrity
+3. Start Gradio Web interface
+4. Automatically open browser to access service
+![Gradio English Interface](../../../../assets/figs/portabl_windows/pic_gradio_en.png)
+**⚠️ Important Notes**:
+- **Display Issues**: If the webpage opens blank or displays abnormally, please run `pip install --upgrade gradio` to upgrade the Gradio version.
--- a/docs/EN/source/deploy_guides/deploy_service.md
+++ b/docs/EN/source/deploy_guides/deploy_service.md
+# Service Deployment
+lightx2v provides asynchronous service functionality. The code entry point is [here](https://github.com/ModelTC/LightX2V/blob/main/lightx2v/server/main.py)
+### Start the Service
+```shell
+# Modify the paths in the script
+bash scripts/server/start_server.sh
+```
+The `--port 8000` option means the service will bind to port `8000` on the local machine. You can change this as needed.
+### Client Sends Request
+```shell
+python scripts/server/post.py
+```
+The service endpoint is: `/v1/tasks/`
+The `message` parameter in `scripts/server/post.py` is as follows:
+```python
+message = {
+    "prompt": "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
+    "negative_prompt": "镜头晃动，色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走",
+    "image_path": "",
+    "target_shape": [720, 720],
+}
+```
+1. `prompt`, `negative_prompt`, and `image_path` are basic inputs for video generation. `image_path` can be an empty string, indicating no image input is needed. `target_shape` optional output video resolution, defaults to the configured resolution.
+### Client Checks Server Status
+```shell
+python scripts/server/check_status.py
+```
+The service endpoints include:
+1. `/v1/service/status` is used to check the status of the service. It returns whether the service is `busy` or `idle`. The service only accepts new requests when it is `idle`.
+2. `/v1/tasks/` is used to get all tasks received and completed by the server.
+3. `/v1/tasks/{task_id}/status` is used to get the status of a specified `task_id`. It returns whether the task is `processing` or `completed`.
+### Client Stops the Current Task on the Server at Any Time
+```shell
+python scripts/server/stop_running_task.py
+```
+The service endpoint is: `/v1/tasks/running`
+After terminating the task, the server will not exit but will return to waiting for new requests.
+### Starting Multiple Services on a Single Node
+On a single node, you can start multiple services using `scripts/server/start_server.sh` (Note that the port numbers under the same IP must be different for each service), or you can start multiple services at once using `scripts/server/start_multi_servers.sh`:
+```shell
+num_gpus=8 bash scripts/server/start_multi_servers.sh
+```
+Where `num_gpus` indicates the number of services to start; the services will run on consecutive ports starting from `--start_port`.
+### Scheduling Between Multiple Services
+```shell
+python scripts/server/post_multi_servers.py
+```
+`post_multi_servers.py` will schedule multiple client requests based on the idle status of the services.
+### API Endpoints Summary
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/v1/tasks/` | POST | Create video generation task |
+| `/v1/tasks/form` | POST | Create video generation task via form |
+| `/v1/tasks/` | GET | Get all task list |
+| `/v1/tasks/{task_id}/status` | GET | Get status of specified task |
+| `/v1/tasks/{task_id}/result` | GET | Get result video file of specified task |
+| `/v1/tasks/running` | DELETE | Stop currently running task |
+| `/v1/files/download/{file_path}` | GET | Download file |
+| `/v1/service/status` | GET | Get service status |
--- a/docs/EN/source/deploy_guides/for_low_latency.md
+++ b/docs/EN/source/deploy_guides/for_low_latency.md
+# Deployment for Low Latency Scenarios
+In low latency scenarios, we pursue faster speed, ignoring issues such as video memory and RAM overhead. We provide two solutions:
+## 💡 Solution 1: Inference with Step Distillation Model
+This solution can refer to the [Step Distillation Documentation](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/step_distill.html)
+🧠 **Step Distillation** is a very direct acceleration inference solution for video generation models. By distilling from 50 steps to 4 steps, the time consumption will be reduced to 4/50 of the original. At the same time, under this solution, it can still be combined with the following solutions:
+1. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html)
+2. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html)
+## 💡 Solution 2: Inference with Non-Step Distillation Model
+Step distillation requires relatively large training resources, and the model after step distillation may have degraded video dynamic range.
+For the original model without step distillation, we can use the following solutions or a combination of multiple solutions for acceleration:
+1. [Parallel Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/parallel.html) for multi-GPU parallel acceleration.
+2. [Feature Caching](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/cache.html) to reduce the actual inference steps.
+3. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html) to accelerate Attention inference.
+4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
+5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
+## 💡 Using Tiny VAE
+In some cases, the VAE component can be time-consuming. You can use a lightweight VAE for acceleration, which can also reduce some GPU memory usage.
+```python
+{
+    "use_tae": true,
+    "tae_path": "/path to taew2_1.pth"
+}
+```
+The taew2_1.pth weights can be downloaded from [here](https://github.com/madebyollin/taehv/raw/refs/heads/main/taew2_1.pth)
+## ⚠️ Note
+Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
+If you have any questions, feel free to report bugs or request features in [🐛 GitHub Issues](https://github.com/ModelTC/lightx2v/issues)