Update gradio and docs

dfc3b85e · gushiqiao · dfe72c5d · dfc3b85e · dfc3b85e · dfc3b85e
Commit dfc3b85e authored Jul 20, 2025 by gushiqiao
14 changed files
--- a/app/gradio_demo.py
+++ b/app/gradio_demo.py
@@ -403,6 +403,7 @@ def run_inference(
        "rotary_chunk": rotary_chunk,
        "rotary_chunk_size": rotary_chunk_size,
        "clean_cuda_cache": clean_cuda_cache,
+        "denoising_step_list": [1000, 750, 500, 250]
    }
    args = argparse.Namespace(
@@ -818,18 +819,22 @@ def main():
                                randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed)
                                with gr.Column():
+                                    # Set default inference steps based on model class
+                                    default_infer_steps = 4 if model_cls == "wan2.1_distill" else 40
                                    infer_steps = gr.Slider(
                                        label="Inference Steps",
                                        minimum=1,
                                        maximum=100,
                                        step=1,
-                                        value=40,
+                                        value=default_infer_steps,
                                        info="Number of inference steps for video generation. Increasing steps may improve quality but reduce speed.",
                                    )
+                            # Set default CFG based on model class
+                            default_enable_cfg = False if model_cls == "wan2.1_distill" else True
                            enable_cfg = gr.Checkbox(
                                label="Enable Classifier-Free Guidance",
-                                value=True,
+                                value=default_enable_cfg,
                                info="Enable classifier-free guidance to control prompt strength",
                            )
                            cfg_scale = gr.Slider(
@@ -1149,7 +1154,7 @@ def main():
                outputs=output_video,
            )
-    demo.launch(share=True, server_port=args.server_port, server_name=args.server_name)
+    demo.launch(share=True, server_port=args.server_port, server_name=args.server_name, inbrowser=True)
 if __name__ == "__main__":
@@ -1158,9 +1163,9 @@ if __name__ == "__main__":
    parser.add_argument(
        "--model_cls",
        type=str,
-        choices=["wan2.1"],
+        choices=["wan2.1", "wan2.1_distill"],
        default="wan2.1",
-        help="Model class to use",
+        help="Model class to use (wan2.1: standard model, wan2.1_distill: distilled model for faster inference)",
    )
    parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="Model type to use")
    parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="Specify the task type. 'i2v' for image-to-video translation, 't2v' for text-to-video generation.")

--- a/app/gradio_demo_zh.py
+++ b/app/gradio_demo_zh.py
@@ -405,6 +405,7 @@ def run_inference(
        "rotary_chunk": rotary_chunk,
        "rotary_chunk_size": rotary_chunk_size,
        "clean_cuda_cache": clean_cuda_cache,
+        "denoising_step_list": [1000, 750, 500, 250]
    }
    args = argparse.Namespace(
@@ -818,18 +819,22 @@ def main():
                                randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed)
                                with gr.Column():
+                                    # 根据模型类别设置默认推理步数
+                                    default_infer_steps = 4 if model_cls == "wan2.1_distill" else 40
                                    infer_steps = gr.Slider(
                                        label="推理步数",
                                        minimum=1,
                                        maximum=100,
                                        step=1,
-                                        value=40,
+                                        value=default_infer_steps,
                                        info="视频生成的推理步数。增加步数可能提高质量但降低速度。",
                                    )
+                            # 根据模型类别设置默认CFG
+                            default_enable_cfg = False if model_cls == "wan2.1_distill" else True
                            enable_cfg = gr.Checkbox(
                                label="启用无分类器引导",
-                                value=True,
+                                value=default_enable_cfg,
                                info="启用无分类器引导以控制提示词强度",
                            )
                            cfg_scale = gr.Slider(
@@ -1147,7 +1152,7 @@ def main():
                outputs=output_video,
            )
-    demo.launch(share=True, server_port=args.server_port, server_name=args.server_name)
+    demo.launch(share=True, server_port=args.server_port, server_name=args.server_name, inbrowser=True)
 if __name__ == "__main__":
@@ -1156,9 +1161,9 @@ if __name__ == "__main__":
    parser.add_argument(
        "--model_cls",
        type=str,
-        choices=["wan2.1"],
+        choices=["wan2.1", "wan2.1_distill"],
        default="wan2.1",
-        help="要使用的模型类别",
+        help="要使用的模型类别 (wan2.1: 标准模型, wan2.1_distill: 蒸馏模型，推理更快)",
    )
    parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="模型大小：14b 或 1.3b")
    parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="指定任务类型。'i2v'用于图像到视频转换，'t2v'用于文本到视频生成。")

--- a/app/run_gradio.sh
+++ b/app/run_gradio.sh
@@ -28,6 +28,10 @@ t2v_model_path=/path/to/Wan2.1-T2V-1.3B
 # Default model size (14b, 1.3b)
 model_size="14b"
+# Model class configuration
+# Default model class (wan2.1, wan2.1_distill)
+model_cls="wan2.1"
 # Server configuration
 server_name="0.0.0.0"
 server_port=8032
@@ -72,6 +76,10 @@ while [[ $# -gt 0 ]]; do
            model_size="$2"
            shift 2
            ;;
+        --model_cls)
+            model_cls="$2"
+            shift 2
+            ;;
        --help)
            echo "🎬 Lightx2v Gradio Demo Startup Script"
            echo "=========================================="
@@ -90,6 +98,10 @@ while [[ $# -gt 0 ]]; do
            echo "                     Model size (default: 14b)"
            echo "                     14b: 14 billion parameters model"
            echo "                     1.3b: 1.3 billion parameters model"
+            echo "  --model_cls MODEL_CLASS"
+            echo "                     Model class (default: wan2.1)"
+            echo "                     wan2.1: Standard model variant"
+            echo "                     wan2.1_distill: Distilled model variant for faster inference"
            echo "  --help            Show this help message"
            echo ""
            echo "🚀 Usage examples:"
@@ -99,6 +111,7 @@ while [[ $# -gt 0 ]]; do
            echo "  $0 --task i2v --gpu 1 --port 8032     # Use GPU 1"
            echo "  $0 --task t2v --model_size 1.3b       # Use 1.3B model"
            echo "  $0 --task i2v --model_size 14b        # Use 14B model"
+            echo "  $0 --task i2v --model_cls wan2.1_distill  # Use distilled model"
            echo ""
            echo "📝 Notes:"
            echo "  - Edit script to configure model paths before first use"
@@ -132,6 +145,12 @@ if [[ "$model_size" != "14b" && "$model_size" != "1.3b" ]]; then
    exit 1
 fi
+# Validate model class
+if [[ "$model_cls" != "wan2.1" && "$model_cls" != "wan2.1_distill" ]]; then
+    echo "Error: Model class must be 'wan2.1' or 'wan2.1_distill'"
+    exit 1
+fi
 # Select model path based on task type
 if [[ "$task" == "i2v" ]]; then
    model_path=$i2v_model_path
@@ -181,6 +200,7 @@ echo "📁 Project path: $lightx2v_path"
 echo "🤖 Model path: $model_path"
 echo "🎯 Task type: $task"
 echo "🤖 Model size: $model_size"
+echo "🤖 Model class: $model_cls"
 echo "🌏 Interface language: $lang"
 echo "🖥️  GPU device: $gpu_id"
 echo "🌐 Server address: $server_name:$server_port"
@@ -208,6 +228,7 @@ echo "=========================================="
 # Start Python demo
 python $demo_file \
    --model_path "$model_path" \
+    --model_cls "$model_cls" \
    --task "$task" \
    --server_name "$server_name" \
    --server_port "$server_port" \

--- a/app/run_gradio_win.bat
+++ b/app/run_gradio_win.bat
--- a/docs/EN/source/deploy_guides/deploy_gradio.md
+++ b/docs/EN/source/deploy_guides/deploy_gradio.md
-# Gradio Deployment
+# Gradio Deployment Guide
 ## 📖 Overview
 Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
+## 📁 File Structure
+```
+LightX2V/app/
+├── gradio_demo.py          # English interface demo
+├── gradio_demo_zh.py       # Chinese interface demo
+├── run_gradio.sh          # Startup script
+├── README.md              # Documentation
+├── saved_videos/          # Generated video save directory
+└── inference_logs.log     # Inference logs
+```
 This project contains two main demo files:
 - `gradio_demo.py` - English interface version
 - `gradio_demo_zh.py` - Chinese interface version
 ## 🚀 Quick Start
-### System Requirements
+### Environment Requirements
- Python 3.10+ (recommended)
+Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
- CUDA 12.4+ (recommended)
- At least 8GB GPU VRAM
- At least 16GB system memory (preferably at least 32GB)
- At least 128GB SSD solid-state drive (**💾 Strongly recommend using SSD solid-state drives to store model files! During "lazy loading" startup, significantly improves model loading speed and inference performance**)
-### Install Dependencies
-```bash
-# Install basic dependencies
-pip install -r requirements.txt
-pip install gradio
-```
 #### Recommended Optimization Library Configuration
@@ -34,6 +34,8 @@ pip install gradio
 - ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
 - ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
+Install according to the project homepage tutorials for each operator as needed
 ### 🤖 Supported Models
 #### 🎬 Image-to-Video Models
@@ -54,15 +56,21 @@ pip install gradio
 | ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | Distilled optimized version | High quality + fast inference |
 **💡 Model Selection Recommendations**:
- **First-time use**: Recommend choosing distilled versions
+- **First-time use**: Recommend choosing distilled versions (`wan2.1_distill`)
 - **Pursuing quality**: Choose 720p resolution or 14B parameter models
- **Pursuing speed**: Choose 480p resolution or 1.3B parameter models
+- **Pursuing speed**: Choose 480p resolution or 1.3B parameter models, prioritize distilled versions
 - **Resource-constrained**: Prioritize distilled versions and lower resolutions
+- **Real-time applications**: Strongly recommend using distilled models (`wan2.1_distill`)
+**🎯 Model Category Description**:
+- **`wan2.1`**: Standard model, provides the best video generation quality, suitable for scenarios with extremely high quality requirements
+- **`wan2.1_distill`**: Distilled model, optimized through knowledge distillation technology, significantly improves inference speed, maintains good quality while greatly reducing computation time, suitable for most application scenarios
 ### Startup Methods
 #### Method 1: Using Startup Script (Recommended)
+**Linux Environment:**
 ```bash
 # 1. Edit the startup script to configure relevant paths
 cd app/
@@ -79,41 +87,84 @@ vim run_gradio.sh
 # 2. Run the startup script
 bash run_gradio.sh
-# 3. Or start with parameters (recommended)
+# 3. Or start with parameters (recommended using distilled models)
-bash run_gradio.sh --task i2v --lang en --model_size 14b --port 8032
+bash run_gradio.sh --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
-# bash run_gradio.sh --task i2v --lang en --model_size 14b --port 8032
+bash run_gradio.sh --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
-# bash run_gradio.sh --task i2v --lang en --model_size 1.3b --port 8032
+bash run_gradio.sh --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
+bash run_gradio.sh --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
+```
+**Windows Environment:**
+```cmd
+# 1. Edit the startup script to configure relevant paths
+cd app\
+notepad run_gradio_win.bat
+# Configuration items that need to be modified:
+# - lightx2v_path: Lightx2v project root directory path
+# - i2v_model_path: Image-to-video model path
+# - t2v_model_path: Text-to-video model path
+# 💾 Important note: Recommend pointing model paths to SSD storage locations
+# Example: D:\models\ or E:\models\
+# 2. Run the startup script
+run_gradio_win.bat
+# 3. Or start with parameters (recommended using distilled models)
+run_gradio_win.bat --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
+run_gradio_win.bat --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
+run_gradio_win.bat --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
+run_gradio_win.bat --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 #### Method 2: Direct Command Line Startup
+**Linux Environment:**
 **Image-to-Video Mode:**
 ```bash
 python gradio_demo.py \
-    --model_path /path/to/Wan2.1-I2V-14B-720P-Lightx2v \
+    --model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \
+    --model_cls wan2.1 \
    --model_size 14b \
    --task i2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
-**Text-to-Video Mode:**
+**English Interface Version:**
 ```bash
 python gradio_demo.py \
-    --model_path /path/to/Wan2.1-T2V-1.3B \
+    --model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \
-    --model_size 1.3b \
+    --model_cls wan2.1_distill \
+    --model_size 14b \
    --task t2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
-**Chinese Interface Version:**
+**Windows Environment:**
-```bash
-python gradio_demo_zh.py \
+**Image-to-Video Mode:**
-    --model_path /path/to/model \
+```cmd
-    --model_size 14b \
+python gradio_demo.py ^
-    --task i2v \
+    --model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^
-    --server_name 0.0.0.0 \
+    --model_cls wan2.1 ^
+    --model_size 14b ^
+    --task i2v ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
+**English Interface Version:**
+```cmd
+python gradio_demo.py ^
+    --model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^
+    --model_cls wan2.1_distill ^
+    --model_size 14b ^
+    --task t2v ^
+    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
@@ -122,8 +173,8 @@ python gradio_demo_zh.py \
 | Parameter | Type | Required | Default | Description |
 |-----------|------|----------|---------|-------------|
 | `--model_path` | str | ✅ | - | Model folder path |
-| `--model_cls` | str | ❌ | wan2.1 | Model class (currently only supports wan2.1) |
+| `--model_cls` | str | ❌ | wan2.1 | Model class: `wan2.1` (standard model) or `wan2.1_distill` (distilled model, faster inference) |
-| `--model_size` | str | ✅ | - | Model size: `14b(t2v or i2v)` or `1.3b(t2v)` |
+| `--model_size` | str | ✅ | - | Model size: `14b (image-to-video or text-to-video)` or `1.3b (text-to-video)` |
 | `--task` | str | ✅ | - | Task type: `i2v` (image-to-video) or `t2v` (text-to-video) |
 | `--server_port` | int | ❌ | 7862 | Server port |
 | `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
@@ -197,23 +248,11 @@ After enabling "Auto-configure Inference Options", the system will automatically
 **💡 For devices with insufficient VRAM or performance constraints**:
- **🎯 Model Selection**: Prioritize using distilled version models (StepDistill-CfgDistill)
+- **🎯 Model Selection**: Prioritize using distilled version models (`wan2.1_distill`)
 - **⚡ Inference Steps**: Recommend setting to 4 steps
 - **🔧 CFG Settings**: Recommend disabling CFG option to improve generation speed
 - **🔄 Auto-Configuration**: Enable "Auto-configure Inference Options"
+- **💾 Storage Optimization**: Ensure models are stored on SSD for optimal loading performance
-## 📁 File Structure
-```
-lightx2v/app/
-├── gradio_demo.py          # English interface demo
-├── gradio_demo_zh.py       # Chinese interface demo
-├── run_gradio.sh          # Startup script
-├── README.md              # Documentation
-├── saved_videos/          # Generated video save directory
-└── inference_logs.log     # Inference logs
-```
 ## 🎨 Interface Description
@@ -278,5 +317,6 @@ nvidia-smi
 htop
 ```
+Welcome to submit Issues and Pull Requests to improve this project!
 **Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
--- a/docs/EN/source/deploy_guides/deploy_local_windows.md
+++ b/docs/EN/source/deploy_guides/deploy_local_windows.md
-# Local Windows Deployment Guide
+# Windows Local Deployment Guide
-This document provides detailed instructions for deploying LightX2V locally on Windows environments.
+## 📖 Overview
-## System Requirements
+This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
-Before getting started, please ensure your system meets the following requirements:
+## 🚀 Quick Start
- **Operating System**: Windows 10/11
+### Environment Requirements
- **Graphics Card**: NVIDIA GPU (with CUDA support)
- **VRAM**: At least 8GB VRAM
- **Memory**: At least 16GB RAM
- **Storage**: 20GB+ available disk space
- **Environment Manager**: Anaconda or Miniconda installed
- **Network Tools**: Git (for cloning repositories)
-## Deployment Steps
+#### Hardware Requirements
+- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
+- **Memory**: Recommended 16GB+ RAM
+- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
-### Step 1: Check CUDA Version
+#### Software Requirements
+- **Operating System**: Windows 10/11
+- **Python**: 3.12 or higher version
+- **CUDA**: 12.4 or higher version
+- **Dependencies**: Refer to LightX2V project's requirements_win.txt
-First, verify your GPU driver and CUDA version by running the following command in Command Prompt:
+### Installation Steps
-```bash
+1. **Clone Project**
-nvidia-smi
+```cmd
+git clone https://github.com/ModelTC/LightX2V.git
+cd LightX2V
 ```
-Note the **CUDA Version** displayed in the output, as you'll need to match this version during subsequent installations.
+2. **Install Dependencies**
+```cmd
-### Step 2: Create Python Environment
+pip install -r requirements_win.txt
-Create an isolated conda environment, we recommend using Python 3.12:
-```bash
-# Create new environment (using Python 3.12 as example)
-conda create -n lightx2v python=3.12 -y
-# Activate environment
-conda activate lightx2v
 ```
-> 💡 **Tip**: Python 3.10 or higher is recommended for optimal compatibility.
+3. **Download Models**
+Refer to [Model Download Guide](../getting_started/quickstart.md) to download required models
-### Step 3: Install PyTorch Framework
+## 🎯 Usage Methods
-#### Method 1: Download Official Wheel Packages (Recommended)
+### Method 1: Using Batch File Inference
-1. Visit the [PyTorch Official Wheel Download Page](https://download.pytorch.org/whl/torch/)
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
-2. Select the appropriate wheel package, ensuring you match:
-   - **Python Version**: Must match your environment (cp312 means Python 3.12)
-   - **CUDA Version**: Must match your GPU driver
-   - **Platform**: Choose Windows version (win_amd64)
-**Example for Python 3.12 + PyTorch 2.6 + CUDA 12.4:**
+### Method 2: Using Gradio Web Interface Inference
-```
+#### Manual Gradio Configuration
-torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
-```
-After downloading, install the packages:
+Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
-```bash
+#### One-Click Gradio Startup (Recommended)
-# Install PyTorch (replace with actual file path)
-pip install torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
-# Install accompanying vision and audio packages
+**📦 Download Software Package**
-pip install torchvision==0.21.0 torchaudio==2.6.0
+- [Baidu Cloud]() - To be added
-```
+- [Quark Cloud]() - To be added
-#### Method 2: Direct pip Installation
+**📁 Directory Structure**
+After extraction, ensure the directory structure is as follows:
-If you prefer direct installation, use the following command:
-```bash
-# Example: CUDA 12.4 version
-pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124
 ```
+├── env/                        # LightX2V environment directory
-### Step 4: Install Windows Version vLLM
+├── LightX2V/                   # LightX2V project directory
+├── start_lightx2v.bat          # One-click startup script
-Download the corresponding wheel package from the [vllm-windows releases page](https://github.com/SystemPanic/vllm-windows/releases).
+├── lightx2v_config.txt         # Configuration file
+├── LightX2V使用说明.txt         # LightX2V usage instructions
-**Version Matching Requirements:**
+└── models/                     # Model storage directory
- Python version must match (e.g., cp312)
+    ├── 说明.txt                       # Model documentation
- PyTorch version must match
+    ├── Wan2.1-I2V-14B-480P-Lightx2v/  # Image-to-video model (480P)
- CUDA version must match
+    ├── Wan2.1-I2V-14B-720P-Lightx2v/  # Image-to-video model (720P)
+    ├── Wan2.1-I2V-14B-480P-StepDistill-CfgDistil-Lightx2v/  # Image-to-video model (4-step distillation, 480P)
-**Recommended v0.9.1 Installation:**
+    ├── Wan2.1-I2V-14B-720P-StepDistill-CfgDistil-Lightx2v/  # Image-to-video model (4-step distillation, 720P)
+    ├── Wan2.1-T2V-1.3B-Lightx2v/      # Text-to-video model (1.3B parameters)
-```bash
+    ├── Wan2.1-T2V-14B-Lightx2v/       # Text-to-video model (14B parameters)
-pip install vllm-0.9.1+cu124-cp312-cp312-win_amd64.whl
+    └── Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v/      # Text-to-video model (4-step distillation)
 ```
-> ⚠️ **Note**: Please select the appropriate wheel package filename based on your specific environment.
+**📋 Configuration Parameters**
-### Step 5: Install Attention Mechanism Operators
+Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
-You can choose to install either Flash Attention 2 or SageAttention 2. **SageAttention 2 is strongly recommended**.
+```ini
+# Task type (i2v: image-to-video, t2v: text-to-video)
+task=i2v
-#### Option A: Flash Attention 2
+# Interface language (zh: Chinese, en: English)
+lang=en
-```bash
+# Server port
-pip install flash-attn==2.7.2.post1
+port=8032
-```
-#### Option B: SageAttention 2 (Recommended)
-**Download Sources:**
- [Windows Version 1](https://github.com/woct0rdho/SageAttention/releases)
- [Windows Version 2](https://github.com/sdbds/SageAttention-for-windows/releases)
-**Version Selection Guidelines:**
+# GPU device ID (0, 1, 2...)
- Python version must match
+gpu=0
- PyTorch version must match
- **CUDA version can be flexible** (SageAttention doesn't use breaking APIs yet)
-**Recommended Installation Version:**
+# Model size (14b: 14B parameter model, 1.3b: 1.3B parameter model)
+model_size=14b
-```bash
+# Model class (wan2.1: standard model, wan2.1_distill: distilled model)
-pip install sageattention-2.1.1+cu126torch2.6.0-cp312-cp312-win_amd64.whl
+model_cls=wan2.1
 ```
-**Verify SageAttention Installation:**
+**⚠️ Important Note**: If using distilled models (model names containing StepDistill-CfgDistil field), please set `model_cls` to `wan2.1_distill`
-After installation, we recommend running a verification script to ensure proper functionality:
-> 📝 **Testing**: You can also run the [official test script](https://github.com/woct0rdho/SageAttention/blob/main/tests/test_sageattn.py) for more detailed functionality verification.
-### Step 6: Get LightX2V Project Code
-Clone the LightX2V project from GitHub and install Windows-specific dependencies:
-```bash
-# Clone project code
-git clone https://github.com/ModelTC/LightX2V.git
-# Enter project directory
-cd LightX2V
-# Install Windows-specific dependencies
-pip install -r requirements_win.txt
-```
-> 🔍 **Note**: We use `requirements_win.txt` instead of the standard `requirements.txt` because Windows environments may require specific package versions or additional dependencies.
-## Troubleshooting
-### 1. CUDA Version Mismatch
-**Symptoms**: CUDA-related errors occur
-**Solutions**:
- Verify GPU driver supports required CUDA version
- Re-download matching wheel packages
- Use `nvidia-smi` to check maximum supported CUDA version
-### 2. Dependency Conflicts
-**Symptoms**: Package version conflicts or import errors
-**Solutions**:
- Remove existing environment: `conda env remove -n lightx2v`
- Recreate environment and install dependencies strictly by version requirements
- Use virtual environments to isolate dependencies for different projects
-### 3. Wheel Package Download Issues
-**Symptoms**: Slow download speeds or connection failures
-**Solutions**:
- Use download tools or browser for direct downloads
- Look for domestic mirror sources
- Check network connections and firewall settings
-## Next Steps
-After completing the environment setup, you can:
+**🚀 Start Service**
- 📚 Check the [Quick Start Guide](../getting_started/quickstart.md) (skip environment installation steps)
+Double-click to run the `start_lightx2v.bat` file, the script will:
- 🌐 Use the [Gradio Web Interface](./deploy_gradio.md) for visual operations (skip environment installation steps)
+1. Automatically read configuration file
+2. Verify model paths and file integrity
+3. Start Gradio Web interface
+4. Automatically open browser to access service
-## Version Compatibility Reference
+**💡 Usage Suggestion**: After opening the Gradio Web page, it's recommended to check "Auto-configure Inference Options", the system will automatically select appropriate optimization configurations for your machine. When reselecting resolution, you also need to re-check "Auto-configure Inference Options".
-| Component | Recommended Version |
+**⚠️ Important Note**: On first run, the system will automatically extract the environment file `env.zip`, which may take several minutes. Please be patient. Subsequent launches will skip this step. You can also manually extract the `env.zip` file to the current directory to save time on first startup.
-|-----------|-------------------|
-| Python | 3.12 |
-| PyTorch | 2.6.0+cu124 |
-| vLLM | 0.9.1+cu124 |
-| SageAttention | 2.1.1+cu126torch2.6.0 |
-| CUDA | 12.4+ |
---
+### Method 3: Using ComfyUI Inference
-💡 **Pro Tip**: If you encounter other issues, we recommend first checking whether all component versions match properly, as most problems stem from version incompatibilities.
+TODO - To be added ComfyUI integration guide
--- a/docs/EN/source/getting_started/quickstart.md
+++ b/docs/EN/source/getting_started/quickstart.md
-# Quick Start
+# LightX2V Quick Start Guide
-## Prepare Environment
+Welcome to LightX2V! This guide will help you quickly set up the environment and start using LightX2V for video generation.
-We recommend using a docker environment. Here is the [dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags) for lightx2v. Please select the tag with the latest date, for example, 25061301.
+## 📋 Table of Contents
-```shell
+- [System Requirements](#system-requirements)
+- [Linux Environment Setup](#linux-environment-setup)
+  - [Docker Environment (Recommended)](#docker-environment-recommended)
+  - [Conda Environment Setup](#conda-environment-setup)
+- [Windows Environment Setup](#windows-environment-setup)
+- [Inference Usage](#inference-usage)
+## 🚀 System Requirements
+- **Operating System**: Linux (Ubuntu 18.04+) or Windows 10/11
+- **Python**: 3.10 or higher
+- **GPU**: NVIDIA GPU with CUDA support, at least 8GB VRAM
+- **Memory**: 16GB or more recommended
+- **Storage**: At least 50GB available space
+## 🐧 Linux Environment Setup
+### 🐳 Docker Environment (Recommended)
+We strongly recommend using the Docker environment, which is the simplest and fastest installation method.
+#### 1. Pull Image
+Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags) and select a tag with the latest date, such as `25061301`:
+```bash
+# Pull the latest version of LightX2V image
 docker pull lightx2v/lightx2v:25061301
+```
+#### 2. Run Container
+```bash
 docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings] --entrypoint /bin/bash [image_id]
 ```
-If you want to set up the environment yourself using conda, you can refer to the following steps:
+#### 3. Domestic Mirror Source (Optional)
+For users in mainland China, if the network is unstable when pulling images, you can pull from [Duduniao](https://docker.aityp.com/r/docker.io/lightx2v/lightx2v):
+```bash
+docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/lightx2v/lightx2v:25061301
+```
+### 🐍 Conda Environment Setup
+If you prefer to set up the environment yourself using Conda, please follow these steps:
+#### Step 1: Clone Repository
+```bash
+# Download project code
+git clone https://github.com/ModelTC/LightX2V.git
+cd LightX2V
+```
+#### Step 2: Create Conda Virtual Environment
+```bash
+# Create and activate conda environment
+conda create -n lightx2v python=3.12 -y
+conda activate lightx2v
+```
-```shell
+#### Step 3: Install Dependencies
-# clone repo and submodules
-git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
-conda create -n lightx2v python=3.11 && conda activate lightx2v
+```bash
+# Install basic dependencies
 pip install -r requirements.txt
+```
+> 💡 **Note**: The Hunyuan model needs to run under transformers version 4.45.2. If you don't need to run the Hunyuan model, you can skip the transformers version restriction.
-# The Hunyuan model needs to run under this version of transformers. If you do not need to run the Hunyuan model, you can ignore this step.
+#### Step 4: Install Attention Operators
-# pip install transformers==4.45.2
-# install flash-attention 2
+**Option A: Flash Attention 2**
+```bash
 git clone https://github.com/Dao-AILab/flash-attention.git --recursive
 cd flash-attention && python setup.py install
+```
-# install flash-attention 3, only if hopper
+**Option B: Flash Attention 3 (for Hopper architecture GPUs)**
+```bash
 cd flash-attention/hopper && python setup.py install
 ```
-## Infer
+**Option C: SageAttention 2 (Recommended)**
+```bash
+git clone https://github.com/thu-ml/SageAttention.git
+cd SageAttention && python setup.py install
+```
+## 🪟 Windows Environment Setup
+Windows systems only support Conda environment setup. Please follow these steps:
+### 🐍 Conda Environment Setup
+#### Step 1: Check CUDA Version
+First, confirm your GPU driver and CUDA version:
+```cmd
+nvidia-smi
+```
+Record the **CUDA Version** information in the output, which needs to be consistent in subsequent installations.
+#### Step 2: Create Python Environment
+```cmd
+# Create new environment (Python 3.12 recommended)
+conda create -n lightx2v python=3.12 -y
+# Activate environment
+conda activate lightx2v
+```
+> 💡 **Note**: Python 3.10 or higher is recommended for best compatibility.
+#### Step 3: Install PyTorch Framework
+**Method 1: Download Official Wheel Package (Recommended)**
+1. Visit the [PyTorch Official Download Page](https://download.pytorch.org/whl/torch/)
+2. Select the corresponding version wheel package, paying attention to matching:
+   - **Python Version**: Consistent with your environment
+   - **CUDA Version**: Matches your GPU driver
+   - **Platform**: Select Windows version
-```shell
+**Example (Python 3.12 + PyTorch 2.6 + CUDA 12.4):**
-# Modify the path in the script
+```cmd
+# Download and install PyTorch
+pip install torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
+# Install supporting packages
+pip install torchvision==0.21.0 torchaudio==2.6.0
+```
+**Method 2: Direct Installation via pip**
+```cmd
+# CUDA 12.4 version example
+pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124
+```
+#### Step 4: Install Windows Version vLLM
+Download the corresponding wheel package from [vllm-windows releases](https://github.com/SystemPanic/vllm-windows/releases).
+**Version Matching Requirements:**
+- Python version matching
+- PyTorch version matching
+- CUDA version matching
+```cmd
+# Install vLLM (please adjust according to actual filename)
+pip install vllm-0.9.1+cu124-cp312-cp312-win_amd64.whl
+```
+#### Step 5: Install Attention Mechanism Operators
+**Option A: Flash Attention 2**
+```cmd
+pip install flash-attn==2.7.2.post1
+```
+**Option B: SageAttention 2 (Strongly Recommended)**
+**Download Sources:**
+- [Windows Special Version 1](https://github.com/woct0rdho/SageAttention/releases)
+- [Windows Special Version 2](https://github.com/sdbds/SageAttention-for-windows/releases)
+```cmd
+# Install SageAttention (please adjust according to actual filename)
+pip install sageattention-2.1.1+cu126torch2.6.0-cp312-cp312-win_amd64.whl
+```
+> ⚠️ **Note**: SageAttention's CUDA version doesn't need to be strictly aligned, but Python and PyTorch versions must match.
+#### Step 6: Clone Repository
+```cmd
+# Clone project code
+git clone https://github.com/ModelTC/LightX2V.git
+cd LightX2V
+# Install Windows-specific dependencies
+pip install -r requirements_win.txt
+```
+## 🎯 Inference Usage
+### 📥 Model Preparation
+Before starting inference, you need to download the model files in advance. We recommend:
+- **Download Source**: Download models from [LightX2V Official Hugging Face](https://huggingface.co/lightx2v/) or other open-source model repositories
+- **Storage Location**: It's recommended to store models on SSD disks for better read performance
+- **Available Models**: Including Wan2.1-I2V, Wan2.1-T2V, and other models supporting different resolutions and functionalities
+### 📁 Configuration Files and Scripts
+The configuration files used for inference are available [here](https://github.com/ModelTC/LightX2V/tree/main/configs), and scripts are available [here](https://github.com/ModelTC/LightX2V/tree/main/scripts).
+You need to configure the downloaded model path in the run script. In addition to the input arguments in the script, there are also some necessary parameters in the configuration file specified by `--config_json`. You can modify them as needed.
+### 🚀 Start Inference
+#### Linux Environment
+```bash
+# Run after modifying the path in the script
 bash scripts/wan/run_wan_t2v.sh
 ```
-In addition to the existing input arguments in the script, there are also some necessary parameters in the `wan_t2v.json` file specified by `--config_json`. You can modify them as needed.
+#### Windows Environment
+```cmd
+# Use Windows batch script
+scripts\win\run_wan_t2v.bat
+```
+## 📞 Get Help
+If you encounter problems during installation or usage, please:
+1. Search for related issues in [GitHub Issues](https://github.com/ModelTC/LightX2V/issues)
+2. Submit a new Issue describing your problem
+---
+🎉 **Congratulations!** You have successfully set up the LightX2V environment and can now start enjoying video generation!
--- a/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
-# Gradio 部署
+# Gradio 部署指南
 ## 📖 概述
-Lightx2v 是一个轻量级的视频推理和生成引擎，提供了基于 Gradio 的 Web 界面，支持图像到视频（Image-to-Video）和文本到视频（Text-to-Video）两种生成模式。
+Lightx2v 是一个轻量级的视频推理和生成引擎，提供基于 Gradio 的 Web 界面，支持图像到视频（Image-to-Video）和文本到视频（Text-to-Video）两种生成模式。
+## 📁 文件结构
+```
+LightX2V/app/
+├── gradio_demo.py          # 英文界面演示
+├── gradio_demo_zh.py       # 中文界面演示
+├── run_gradio.sh          # 启动脚本
+├── README.md              # 说明文档
+├── saved_videos/          # 生成视频保存目录
+└── inference_logs.log     # 推理日志
+```
 本项目包含两个主要演示文件：
 - `gradio_demo.py` - 英文界面版本
@@ -12,27 +24,17 @@ Lightx2v 是一个轻量级的视频推理和生成引擎，提供了基于 Grad
 ### 环境要求
- Python 3.10+ (推荐)
+按照[快速开始文档](../getting_started/quickstart.md)安装环境
- CUDA 12.4+ (推荐)
- 至少 8GB GPU 显存
- 至少 16GB 系统内存（最好最少有 32G）
- 至少 128GB SSD固态硬盘 (**💾 强烈建议使用SSD固态硬盘存储模型文件！"延迟加载"启动时，显著提升模型加载速度和推理性能**)
-### 安装依赖☀
-```bash
-# 安装基础依赖
-pip install -r requirements.txt
-pip install gradio
-```
 #### 推荐优化库配置
 - ✅ [Flash attention](https://github.com/Dao-AILab/flash-attention)
 - ✅ [Sage attention](https://github.com/thu-ml/SageAttention)
 - ✅ [vllm-kernel](https://github.com/vllm-project/vllm)
 - ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
- ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (只支持ADA架构的GPU)
+- ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (仅支持ADA架构的GPU)
+可根据需要，按照各算子的项目主页教程进行安装
 ### 🤖 支持的模型
@@ -53,19 +55,22 @@ pip install gradio
 | ✅ [Wan2.1-T2V-14B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-Lightx2v) | 14B | 标准版本 | 平衡速度和质量 |
 | ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | 蒸馏优化版 | 高质量+快速推理 |
 **💡 模型选择建议**:
- **首次使用**: 建议选择蒸馏版本
+- **首次使用**: 建议选择蒸馏版本 (`wan2.1_distill`)
 - **追求质量**: 选择720p分辨率或14B参数模型
- **追求速度**: 选择480p分辨率或1.3B参数模型
+- **追求速度**: 选择480p分辨率或1.3B参数模型，优先使用蒸馏版本
 - **资源受限**: 优先选择蒸馏版本和较低分辨率
+- **实时应用**: 强烈推荐使用蒸馏模型 (`wan2.1_distill`)
+**🎯 模型类别说明**:
+- **`wan2.1`**: 标准模型，提供最佳的视频生成质量，适合对质量要求极高的场景
+- **`wan2.1_distill`**: 蒸馏模型，通过知识蒸馏技术优化，推理速度显著提升，在保持良好质量的同时大幅减少计算时间，适合大多数应用场景
 ### 启动方式
 #### 方式一：使用启动脚本（推荐）
+**Linux 环境：**
 ```bash
 # 1. 编辑启动脚本，配置相关路径
 cd app/
@@ -82,41 +87,84 @@ vim run_gradio.sh
 # 2. 运行启动脚本
 bash run_gradio.sh
-# 3. 或使用参数启动（推荐）
+# 3. 或使用参数启动（推荐使用蒸馏模型）
-bash run_gradio.sh --task i2v --lang zh --model_size 14b --port 8032
+bash run_gradio.sh --task i2v --lang zh --model_cls wan2.1 --model_size 14b --port 8032
-# bash run_gradio.sh --task i2v --lang zh --model_size 14b --port 8032
+bash run_gradio.sh --task t2v --lang zh --model_cls wan2.1 --model_size 1.3b --port 8032
-# bash run_gradio.sh --task i2v --lang zh --model_size 1.3b --port 8032
+bash run_gradio.sh --task i2v --lang zh --model_cls wan2.1_distill --model_size 14b --port 8032
+bash run_gradio.sh --task t2v --lang zh --model_cls wan2.1_distill --model_size 1.3b --port 8032
+```
+**Windows 环境：**
+```cmd
+# 1. 编辑启动脚本，配置相关路径
+cd app\
+notepad run_gradio_win.bat
+# 需要修改的配置项：
+# - lightx2v_path: Lightx2v项目根目录路径
+# - i2v_model_path: 图像到视频模型路径
+# - t2v_model_path: 文本到视频模型路径
+# 💾 重要提示：建议将模型路径指向SSD存储位置
+# 例如：D:\models\ 或 E:\models\
+# 2. 运行启动脚本
+run_gradio_win.bat
+# 3. 或使用参数启动（推荐使用蒸馏模型）
+run_gradio_win.bat --task i2v --lang zh --model_cls wan2.1 --model_size 14b --port 8032
+run_gradio_win.bat --task t2v --lang zh --model_cls wan2.1 --model_size 1.3b --port 8032
+run_gradio_win.bat --task i2v --lang zh --model_cls wan2.1_distill --model_size 14b --port 8032
+run_gradio_win.bat --task t2v --lang zh --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 #### 方式二：直接命令行启动
+**Linux 环境：**
 **图像到视频模式：**
 ```bash
 python gradio_demo_zh.py \
-    --model_path /path/to/Wan2.1-I2V-14B-720P-Lightx2v \
+    --model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \
+    --model_cls wan2.1 \
    --model_size 14b \
    --task i2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
-**文本到视频模式：**
+**英文界面版本：**
 ```bash
-python gradio_demo_zh.py \
+python gradio_demo.py \
-    --model_path /path/to/Wan2.1-T2V-1.3B \
+    --model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \
-    --model_size 1.3b \
+    --model_cls wan2.1_distill \
+    --model_size 14b \
    --task t2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
+**Windows 环境：**
+**图像到视频模式：**
+```cmd
+python gradio_demo_zh.py ^
+    --model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^
+    --model_cls wan2.1 ^
+    --model_size 14b ^
+    --task i2v ^
+    --server_name 127.0.0.1 ^
+    --server_port 7862
+```
 **英文界面版本：**
-```bash
+```cmd
-python gradio_demo.py \
+python gradio_demo_zh.py ^
-    --model_path /path/to/model \
+    --model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^
-    --model_size 14b \
+    --model_cls wan2.1_distill ^
-    --task i2v \
+    --model_size 14b ^
-    --server_name 0.0.0.0 \
+    --task i2v ^
+    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
@@ -125,8 +173,8 @@ python gradio_demo.py \
 | 参数 | 类型 | 必需 | 默认值 | 说明 |
 |------|------|------|--------|------|
 | `--model_path` | str | ✅ | - | 模型文件夹路径 |
-| `--model_cls` | str | ❌ | wan2.1 | 模型类别（目前仅支持wan2.1） |
+| `--model_cls` | str | ❌ | wan2.1 | 模型类别：`wan2.1`（标准模型）或 `wan2.1_distill`（蒸馏模型，推理更快） |
-| `--model_size` | str | ✅ | - | 模型大小：`14b（图像到视频或者文本到视频）` 或 `1.3b（文本到视频）` |
+| `--model_size` | str | ✅ | - | 模型大小：`14b` 或 `1.3b）` |
 | `--task` | str | ✅ | - | 任务类型：`i2v`（图像到视频）或 `t2v`（文本到视频） |
 | `--server_port` | int | ❌ | 7862 | 服务器端口 |
 | `--server_name` | str | ❌ | 0.0.0.0 | 服务器IP地址 |
@@ -178,7 +226,6 @@ python gradio_demo.py \
 启用"自动配置推理选项"后，系统会根据您的硬件配置自动优化参数：
 ### GPU内存规则
 - **80GB+**: 默认配置，无需优化
 - **48GB**: 启用CPU卸载，卸载比例50%
@@ -201,23 +248,11 @@ python gradio_demo.py \
 **💡 针对显存不足或性能受限的设备**:
- **🎯 模型选择**: 优先使用蒸馏版本模型 (StepDistill-CfgDistill)
+- **🎯 模型选择**: 优先使用蒸馏版本模型 (`wan2.1_distill`)
 - **⚡ 推理步数**: 建议设置为 4 步
 - **🔧 CFG设置**: 建议关闭CFG选项以提升生成速度
 - **🔄 自动配置**: 启用"自动配置推理选项"
+- **💾 存储优化**: 确保模型存储在SSD上以获得最佳加载性能
-## 📁 文件结构
-```
-lightx2v/app/
-├── gradio_demo.py          # 英文界面演示
-├── gradio_demo_zh.py       # 中文界面演示
-├── run_gradio.sh          # 启动脚本
-├── README.md              # 说明文档
-├── saved_videos/          # 生成视频保存目录
-└── inference_logs.log     # 推理日志
-```
 ## 🎨 界面说明
@@ -244,12 +279,12 @@ lightx2v/app/
   - 降低分辨率
   - 启用量化选项
-1. **系統内存不足**
+2. **系统内存不足**
   - 启用CPU卸载
   - 启用延迟加载选项
   - 启用量化选项
-2. **生成速度慢**
+3. **生成速度慢**
   - 减少推理步数
   - 启用自动配置
   - 使用轻量级模型
@@ -257,13 +292,13 @@ lightx2v/app/
   - 使用量化算子
   - 💾 **检查模型是否存放在SSD上**
-3. **模型加载缓慢**
+4. **模型加载缓慢**
   - 💾 **将模型迁移到SSD存储**
   - 启用延迟加载选项
   - 检查磁盘I/O性能
   - 考虑使用NVMe SSD
-4. **视频质量不佳**
+5. **视频质量不佳**
   - 增加推理步数
   - 提高CFG缩放因子
   - 使用14B模型
@@ -282,8 +317,6 @@ nvidia-smi
 htop
 ```
 欢迎提交Issue和Pull Request来改进这个项目！
 **注意**: 使用本工具生成的视频内容请遵守相关法律法规，不得用于非法用途。
--- a/docs/ZH_CN/source/deploy_guides/deploy_local_windows.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_local_windows.md
-# 本地Windows电脑部署指南
+# Windows 本地部署指南
-本文档将详细指导您在Windows环境下完成LightX2V的本地部署配置。
+## 📖 概述
-## 系统要求
+本文档将详细指导您在Windows环境下完成LightX2V的本地部署配置，包括批处理文件推理、Gradio Web界面推理等多种使用方式。
-在开始之前，请确保您的系统满足以下要求：
+## 🚀 快速开始
- **操作系统**: Windows 10/11
+### 环境要求
- **显卡**: NVIDIA GPU（支持CUDA）
- **显存**: 至少8GB显存
- **内存**: 至少16GB内存
- **存储空间**: 20GB以上可用硬盘空间
- **环境管理**: 已安装Anaconda或Miniconda
- **网络工具**: Git（用于克隆代码仓库）
-## 部署步骤
+#### 硬件要求
+- **GPU**: NVIDIA GPU，建议 8GB+ VRAM
+- **内存**: 建议 16GB+ RAM
+- **存储**: 强烈建议使用 SSD 固态硬盘，机械硬盘会导致模型加载缓慢
-### 步骤1：检查CUDA版本
+## 🎯 使用方式
-首先确认您的GPU驱动和CUDA版本，在命令提示符中运行：
+### 方式一：使用批处理文件推理
-```bash
+参考[快速开始文档](../getting_started/quickstart.md)安装环境，并使用[批处理文件](https://github.com/ModelTC/LightX2V/tree/main/scripts/win)运行。
-nvidia-smi
-```
-记录输出中显示的**CUDA Version**信息，后续安装时需要保持版本一致。
-### 步骤2：创建Python环境
-创建一个独立的conda环境，推荐使用Python 3.12：
-```bash
-# 创建新环境（以Python 3.12为例）
-conda create -n lightx2v python=3.12 -y
-# 激活环境
-conda activate lightx2v
-```
-> 💡 **提示**: 建议使用Python 3.10或更高版本以获得最佳兼容性。
-### 步骤3：安装PyTorch框架
-#### 方法一：下载官方wheel包安装（推荐）
-1. 访问 [PyTorch官方wheel包下载页面](https://download.pytorch.org/whl/torch/)
-2. 选择对应版本的wheel包，注意匹配：
-   - **Python版本**: 与您的环境一致（cp312表示Python 3.12）
-   - **CUDA版本**: 与您的GPU驱动匹配
-   - **平台**: 选择Windows版本（win_amd64）
-**以Python 3.12 + PyTorch 2.6 + CUDA 12.4为例：**
+### 方式二：使用Gradio Web界面推理
-```
+#### 手动配置Gradio
-torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
-```
-下载完成后进行安装：
-```bash
+参考[快速开始文档](../getting_started/quickstart.md)安装环境，参考[Gradio部署指南](./deploy_gradio.md)
-# 安装PyTorch（请替换为实际的文件路径）
-pip install torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
-# 安装配套的vision和audio包
+#### 一键启动Gradio（推荐）
-pip install torchvision==0.21.0 torchaudio==2.6.0
-```
-#### 方法二：使用pip直接安装
+**📦 下载软件包**
+- [百度云]() - 待补充
+- [夸克网盘]() - 待补充
-如果您偏好直接安装，可以使用以下命令：
+**📁 目录结构**
+解压后，确保目录结构如下：
-```bash
-# 示例：CUDA 12.4版本
-pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124
 ```
+├── env/                        # LightX2V 环境目录
-### 步骤4：安装Windows版vLLM
+├── LightX2V/                   # LightX2V 项目目录
+├── start_lightx2v.bat          # 一键启动脚本
-从 [vllm-windows releases页面](https://github.com/SystemPanic/vllm-windows/releases) 下载对应的wheel包。
+├── lightx2v_config.txt         # 配置文件
+├── LightX2V使用说明.txt         # LightX2V使用说明
-**版本匹配要求：**
+└── models/                     # 模型存放目录
- Python版本匹配（如cp312）
+    ├── 说明.txt                       # 模型说明文档
- PyTorch版本匹配
+    ├── Wan2.1-I2V-14B-480P-Lightx2v/  # 图像转视频模型（480P）
- CUDA版本匹配
+    ├── Wan2.1-I2V-14B-720P-Lightx2v/  # 图像转视频模型（720P）
+    ├── Wan2.1-I2V-14B-480P-StepDistill-CfgDistil-Lightx2v/  # 图像转视频模型（4步蒸馏，480P）
-**推荐安装v0.9.1版本：**
+    ├── Wan2.1-I2V-14B-720P-StepDistill-CfgDistil-Lightx2v/  # 图像转视频模型（4步蒸馏，720P）
+    ├── Wan2.1-T2V-1.3B-Lightx2v/      # 文本转视频模型（1.3B参数）
-```bash
+    ├── Wan2.1-T2V-14B-Lightx2v/       # 文本转视频模型（14B参数）
-pip install vllm-0.9.1+cu124-cp312-cp312-win_amd64.whl
+    └── Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v/      # 文本转视频模型（4步蒸馏）
-```
-> ⚠️ **注意**: 请根据您的具体环境选择对应的wheel包文件名。
-### 步骤5：安装注意力机制算子
-您可以选择安装Flash Attention 2或SageAttention 2，**强烈推荐SageAttention 2**。
-#### 选项A：Flash Attention 2
-```bash
-pip install flash-attn==2.7.2.post1
 ```
-#### 选项B：SageAttention 2（推荐）
+**📋 配置参数**
-**下载源选择：**
- [Windows专用版本1](https://github.com/woct0rdho/SageAttention/releases)
- [Windows专用版本2](https://github.com/sdbds/SageAttention-for-windows/releases)
-**版本选择要点：**
+编辑 `lightx2v_config.txt` 文件，根据需要修改以下参数：
- Python版本必须匹配
- PyTorch版本必须匹配
- **CUDA版本可以不严格对齐**（SageAttention暂未使用破坏性API）
-**推荐安装版本：**
+```ini
+# 任务类型 (i2v: 图像转视频, t2v: 文本转视频)
+task=i2v
-```bash
+# 界面语言 (zh: 中文, en: 英文)
-pip install sageattention-2.1.1+cu126torch2.6.0-cp312-cp312-win_amd64.whl
+lang=zh
-```
-**验证SageAttention安装：**
+# 服务器端口
+port=8032
-> 📝 **测试**: 您也可以运行[测试脚本](https://github.com/woct0rdho/SageAttention/blob/main/tests/test_sageattn.py)进行更详细的功能验证。
+# GPU设备ID (0, 1, 2...)
+gpu=0
-### 步骤6：获取LightX2V项目代码
+# 模型大小 (14b: 14B参数模型, 1.3b: 1.3B参数模型)
+model_size=14b
-从GitHub克隆LightX2V项目并安装Windows专用依赖：
+# 模型类别 (wan2.1: 标准模型, wan2.1_distill: 蒸馏模型)
+model_cls=wan2.1
-```bash
-# 克隆项目代码
-git clone https://github.com/ModelTC/LightX2V.git
-# 进入项目目录
-cd LightX2V
-# 安装Windows专用依赖包
-pip install -r requirements_win.txt
 ```
-> 🔍 **说明**: 这里使用`requirements_win.txt`而不是标准的`requirements.txt`，因为Windows环境可能需要特定的包版本或额外的依赖。
+**⚠️ 重要提示**: 如果使用蒸馏模型（模型名称包含StepDistill-CfgDistil字段），请将`model_cls`设置为`wan2.1_distill`
-## 故障排除
-### 1. CUDA版本不匹配
-**问题现象**: 出现CUDA相关错误
-**解决方案**:
- 确认GPU驱动支持所需CUDA版本
- 重新下载匹配的wheel包
- 可以通过`nvidia-smi`查看支持的最高CUDA版本
-### 2. 依赖冲突
-**问题现象**: 包版本冲突或导入错误
-**解决方案**:
- 删除现有环境: `conda env remove -n lightx2v`
- 重新创建环境并严格按版本要求安装
- 使用虚拟环境隔离不同项目的依赖
-### 3. wheel包下载问题
-**问题现象**: 下载速度慢或失败
-**解决方案**:
- 使用下载工具或浏览器直接下载
- 寻找国内镜像源
- 检查网络连接和防火墙设置
-## 下一步操作
-环境配置完成后，您可以：
+**🚀 启动服务**
- 📚 查看[快速开始指南](../getting_started/quickstart.md)（跳过环境安装步骤）
+双击运行 `start_lightx2v.bat` 文件，脚本将：
- 🌐 使用[Gradio Web界面](./deploy_gradio.md)进行可视化操作（跳过环境安装步骤）
+1. 自动读取配置文件
+2. 验证模型路径和文件完整性
+3. 启动 Gradio Web 界面
+4. 自动打开浏览器访问服务
-## 版本兼容性参考
+**💡 使用建议**: 当打开Gradio Web页面后，建议勾选"自动配置推理选项"，系统会自动选择合适的优化配置针对您的机器。当重新选择分辨率后，也需要重新勾选"自动配置推理选项"。
-| 组件 | 推荐版本 |
+**⚠️ 重要提示**: 首次运行时会自动解压环境文件 `env.zip`，此过程需要几分钟时间，请耐心等待。后续启动无需重复此步骤。您也可以手动解压 `env.zip` 文件到当前目录以节省首次启动时间。
-|------|----------|
-| Python | 3.12 |
-| PyTorch | 2.6.0+cu124 |
-| vLLM | 0.9.1+cu124 |
-| SageAttention | 2.1.1+cu126torch2.6.0 |
-| CUDA | 12.4+ |
---
+### 方式三：使用ComfyUI推理
-💡 **小贴士**: 如果遇到其他问题，建议先检查各组件版本是否匹配，大部分问题都源于版本不兼容。
+TODO - 待补充ComfyUI集成指南
--- a/docs/ZH_CN/source/getting_started/quickstart.md
+++ b/docs/ZH_CN/source/getting_started/quickstart.md
-# 快速入门
+# LightX2V 快速入门指南
-## 准备环境
+欢迎使用 LightX2V！本指南将帮助您快速搭建环境并开始使用 LightX2V 进行视频生成。
-我们推荐使用docker环境，这是lightx2v的[dockerhub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，请选择一个最新日期的tag，比如25061301
+## 📋 目录
-```shell
+- [系统要求](#系统要求)
+- [Linux 系统环境搭建](#linux-系统环境搭建)
+  - [Docker 环境（推荐）](#docker-环境推荐)
+  - [Conda 环境搭建](#conda-环境搭建)
+- [Windows 系统环境搭建](#windows-系统环境搭建)
+- [推理使用](#推理使用)
+## 🚀 系统要求
+- **操作系统**: Linux (Ubuntu 18.04+) 或 Windows 10/11
+- **Python**: 3.10 或更高版本
+- **GPU**: NVIDIA GPU，支持 CUDA，至少 8GB 显存
+- **内存**: 建议 16GB 或更多
+- **存储**: 至少 50GB 可用空间
+## 🐧 Linux 系统环境搭建
+### 🐳 Docker 环境（推荐）
+我们强烈推荐使用 Docker 环境，这是最简单快捷的安装方式。
+#### 1. 拉取镜像
+访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，选择一个最新日期的 tag，比如 `25061301`：
+```bash
+# 拉取最新版本的 LightX2V 镜像
 docker pull lightx2v/lightx2v:25061301
+```
+#### 2. 运行容器
+```bash
 docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --entrypoint /bin/bash [镜像id]
 ```
-对于中国大陆地区，若拉取镜像的时候，网络不稳定，可以从[渡渡鸟](https://docker.aityp.com/r/docker.io/lightx2v/lightx2v)上拉取
+#### 3. 国内镜像源（可选）
+对于中国大陆地区，如果拉取镜像时网络不稳定，可以从[渡渡鸟](https://docker.aityp.com/r/docker.io/lightx2v/lightx2v)上拉取：
-```shell
+```bash
 docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/lightx2v/lightx2v:25061301
 ```
+### 🐍 Conda 环境搭建
+如果您希望使用 Conda 自行搭建环境，请按照以下步骤操作：
+#### 步骤 1: 克隆项目
+```bash
+# 下载项目代码
+git clone https://github.com/ModelTC/LightX2V.git
+cd LightX2V
+```
+#### 步骤 2: 创建 conda 虚拟环境
-如果你想使用conda自己搭建环境，可以参考如下步骤：
+```bash
+# 创建并激活 conda 环境
+conda create -n lightx2v python=3.12 -y
+conda activate lightx2v
+```
-```shell
+#### 步骤 3: 安装依赖
-# 下载github代码
-git clone https://github.com/ModelTC/lightx2v.git lightx2v && cd lightx2v
-conda create -n lightx2v python=3.11 && conda activate lightx2v
+```bash
+# 安装基础依赖
 pip install -r requirements.txt
+```
+> 💡 **提示**: 混元模型需要在 4.45.2 版本的 transformers 下运行，如果您不需要运行混元模型，可以跳过 transformers 版本限制。
-# 混元模型需要在4.45.2版本的transformers下运行，如果不需要跑混元模型，可以忽略
+#### 步骤 4: 安装注意力机制算子
-# pip install transformers==4.45.2
-# 安装 flash-attention 2
+**选项 A: Flash Attention 2**
+```bash
 git clone https://github.com/Dao-AILab/flash-attention.git --recursive
 cd flash-attention && python setup.py install
+```
-# 安装 flash-attention 3, 用于 hopper 显卡
+**选项 B: Flash Attention 3（用于 Hopper 架构显卡）**
+```bash
 cd flash-attention/hopper && python setup.py install
 ```
-## 推理
+**选项 C: SageAttention 2（推荐）**
+```bash
+git clone https://github.com/thu-ml/SageAttention.git
+cd SageAttention && python setup.py install
+```
+## 🪟 Windows 系统环境搭建
+Windows 系统仅支持 Conda 环境搭建方式，请按照以下步骤操作：
+### 🐍 Conda 环境搭建
+#### 步骤 1: 检查 CUDA 版本
+首先确认您的 GPU 驱动和 CUDA 版本：
+```cmd
+nvidia-smi
+```
+记录输出中的 **CUDA Version** 信息，后续安装时需要保持版本一致。
+#### 步骤 2: 创建 Python 环境
+```cmd
+# 创建新环境（推荐 Python 3.12）
+conda create -n lightx2v python=3.12 -y
+# 激活环境
+conda activate lightx2v
+```
+> 💡 **提示**: 建议使用 Python 3.10 或更高版本以获得最佳兼容性。
+#### 步骤 3: 安装 PyTorch 框架
+**方法一：下载官方 wheel 包（推荐）**
+1. 访问 [PyTorch 官方下载页面](https://download.pytorch.org/whl/torch/)
+2. 选择对应版本的 wheel 包，注意匹配：
+   - **Python 版本**: 与您的环境一致
+   - **CUDA 版本**: 与您的 GPU 驱动匹配
+   - **平台**: 选择 Windows 版本
+**示例（Python 3.12 + PyTorch 2.6 + CUDA 12.4）：**
+```cmd
+# 下载并安装 PyTorch
+pip install torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
-```shell
+# 安装配套包
-# 修改脚本中的路径
+pip install torchvision==0.21.0 torchaudio==2.6.0
+```
+**方法二：使用 pip 直接安装**
+```cmd
+# CUDA 12.4 版本示例
+pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124
+```
+#### 步骤 4: 安装 Windows 版 vLLM
+从 [vllm-windows releases](https://github.com/SystemPanic/vllm-windows/releases) 下载对应的 wheel 包。
+**版本匹配要求：**
+- Python 版本匹配
+- PyTorch 版本匹配
+- CUDA 版本匹配
+```cmd
+# 安装 vLLM（请根据实际文件名调整）
+pip install vllm-0.9.1+cu124-cp312-cp312-win_amd64.whl
+```
+#### 步骤 5: 安装注意力机制算子
+**选项 A: Flash Attention 2**
+```cmd
+pip install flash-attn==2.7.2.post1
+```
+**选项 B: SageAttention 2（强烈推荐）**
+**下载源：**
+- [Windows 专用版本 1](https://github.com/woct0rdho/SageAttention/releases)
+- [Windows 专用版本 2](https://github.com/sdbds/SageAttention-for-windows/releases)
+```cmd
+# 安装 SageAttention（请根据实际文件名调整）
+pip install sageattention-2.1.1+cu126torch2.6.0-cp312-cp312-win_amd64.whl
+```
+> ⚠️ **注意**: SageAttention 的 CUDA 版本可以不严格对齐，但 Python 和 PyTorch 版本必须匹配。
+#### 步骤 6: 克隆项目
+```cmd
+# 克隆项目代码
+git clone https://github.com/ModelTC/LightX2V.git
+cd LightX2V
+# 安装 Windows 专用依赖
+pip install -r requirements_win.txt
+```
+## 🎯 推理使用
+### 📥 模型准备
+在开始推理之前，您需要提前下载好模型文件。我们推荐：
+- **下载源**: 从 [LightX2V 官方 Hugging Face](https://huggingface.co/lightx2v/)或者其他开源模型库下载模型
+- **存储位置**: 建议将模型存储在 SSD 磁盘上以获得更好的读取性能
+- **可用模型**: 包括 Wan2.1-I2V、Wan2.1-T2V 等多种模型，支持不同分辨率和功能
+### 📁 配置文件与脚本
+推理会用到的配置文件都在[这里](https://github.com/ModelTC/LightX2V/tree/main/configs)，脚本都在[这里](https://github.com/ModelTC/LightX2V/tree/main/scripts)。
+需要将下载的模型路径配置到运行脚本中。除了脚本中的输入参数，`--config_json` 指向的配置文件中也会包含一些必要参数，您可以根据需要自行修改。
+### 🚀 开始推理
+#### Linux 环境
+```bash
+# 修改脚本中的路径后运行
 bash scripts/wan/run_wan_t2v.sh
 ```
-除了脚本中已有的输入参数，`--config_json`指向的`wan_t2v.json`中也会存在一些必要的参数，可以根据需要，自行修改。
+#### Windows 环境
+```cmd
+# 使用 Windows 批处理脚本
+scripts\win\run_wan_t2v.bat
+```
+## 📞 获取帮助
+如果您在安装或使用过程中遇到问题，请：
+1. 在 [GitHub Issues](https://github.com/ModelTC/LightX2V/issues) 中搜索相关问题
+2. 提交新的 Issue 描述您的问题
+---
+🎉 **恭喜！** 现在您已经成功搭建了 LightX2V 环境，可以开始享受视频生成的乐趣了！
--- a/scripts/bench/run_lightx2v_5_distill.sh
+++ b/scripts/bench/run_lightx2v_5_distill.sh
@@ -30,7 +30,7 @@ export ENABLE_PROFILING_DEBUG=true
 export ENABLE_GRAPH_MODE=false
 python -m lightx2v.infer \
--model_cls wan2.1 \
+--model_cls wan2.1_distill \
 --task i2v \
 --model_path $model_path \
 --config_json ${lightx2v_path}/configs/bench/lightx2v_5_distill.json \

--- a/scripts/bench/run_lightx2v_6_distill.sh
+++ b/scripts/bench/run_lightx2v_6_distill.sh
@@ -31,7 +31,7 @@ export ENABLE_GRAPH_MODE=false
 export DTYPE=BF16
 python -m lightx2v.infer \
--model_cls wan2.1 \
+--model_cls wan2.1_distill \
 --task i2v \
 --model_path $model_path \
 --config_json ${lightx2v_path}/configs/bench/lightx2v_6_distill.json \

--- a/scripts/win/run_wan_i2v.bat
+++ b/scripts/win/run_wan_i2v.bat
@@ -3,8 +3,8 @@ chcp 65001 >nul
 echo 启动LightX2V I2V推理...
 :: 设置路径
-set lightx2v_path=F:\project\code\lightx2v-main\LightX2V-main\LightX2V-main
+set lightx2v_path=D:\LightX2V
-set model_path=D:\Wan2.1-I2V-14B-480P-Lightx2v
+set model_path=D:\models\Wan2.1-I2V-14B-480P-Lightx2v
 :: 检查CUDA_VISIBLE_DEVICES
 if "%CUDA_VISIBLE_DEVICES%"=="" (

--- a/scripts/win/run_wan_t2v.bat
+++ b/scripts/win/run_wan_t2v.bat
@@ -3,8 +3,8 @@ chcp 65001 >nul
 echo 启动LightX2V T2V推理...
 :: 设置路径
-set lightx2v_path=F:\project\code\lightx2v-main\LightX2V-main\LightX2V-main
+set lightx2v_path=D:\LightX2V
-set model_path=D:\Wan2.1-T2V-1.3B-Lightx2v
+set model_path=D:\models\Wan2.1-T2V-1.3B-Lightx2v
 :: 检查CUDA_VISIBLE_DEVICES
 if "%CUDA_VISIBLE_DEVICES%"=="" (