fix gradio (#587)

9a765f9b · Gu Shiqiao · GitHub · 8530a2fb · 9a765f9b · 9a765f9b
Unverified Commit 9a765f9b authored Dec 09, 2025 by Gu Shiqiao Committed by GitHub Dec 09, 2025
8 changed files
--- a/app/gradio_demo.py
+++ b/app/gradio_demo.py
--- a/app/gradio_demo_zh.py
+++ b/app/gradio_demo_zh.py
--- a/app/run_gradio.sh
+++ b/app/run_gradio.sh
@@ -14,27 +14,15 @@
 # Lightx2v project root directory path
 # Example: /home/user/lightx2v or /data/video_gen/lightx2v
-lightx2v_path=/data/video_gen/LightX2V
+lightx2v_path=/path/to/LightX2V
 # Model path configuration
-# Image-to-video model path (for i2v tasks)
 # Example: /path/to/Wan2.1-I2V-14B-720P-Lightx2v
-i2v_model_path=/path/to/Wan2.1-I2V-14B-480P-Lightx2v
+model_path=/path/to/models
-# Text-to-video model path (for t2v tasks)
-# Example: /path/to/Wan2.1-T2V-1.3B
-t2v_model_path=/path/to/Wan2.1-T2V-1.3B
-# Model size configuration
-# Default model size (14b, 1.3b)
-model_size="14b"
-# Model class configuration
-# Default model class (wan2.1, wan2.1_distill)
-model_cls="wan2.1"
 # Server configuration
 server_name="0.0.0.0"
-server_port=8032
+server_port=8033
 # Output directory configuration
 output_dir="./outputs"
@@ -50,18 +38,12 @@ export PROFILING_DEBUG_LEVEL=2
 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 # ==================== Parameter Parsing ====================
-# Default task type
-task="i2v"
 # Default interface language
 lang="zh"
 # 解析命令行参数
 while [[ $# -gt 0 ]]; do
    case $1 in
-        --task)
-            task="$2"
-            shift 2
-            ;;
        --lang)
            lang="$2"
            shift 2
@@ -75,55 +57,32 @@ while [[ $# -gt 0 ]]; do
            export CUDA_VISIBLE_DEVICES=$gpu_id
            shift 2
            ;;
-        --model_size)
-            model_size="$2"
-            shift 2
-            ;;
-        --model_cls)
-            model_cls="$2"
-            shift 2
-            ;;
        --output_dir)
            output_dir="$2"
            shift 2
            ;;
+        --model_path)
+            model_path="$2"
+            shift 2
+            ;;
        --help)
            echo "🎬 Lightx2v Gradio Demo Startup Script"
            echo "=========================================="
            echo "Usage: $0 [options]"
            echo ""
            echo "📋 Available options:"
-            echo "  --task i2v|t2v    Task type (default: i2v)"
-            echo "                     i2v: Image-to-video generation"
-            echo "                     t2v: Text-to-video generation"
            echo "  --lang zh|en      Interface language (default: zh)"
            echo "                     zh: Chinese interface"
            echo "                     en: English interface"
            echo "  --port PORT       Server port (default: 8032)"
            echo "  --gpu GPU_ID      GPU device ID (default: 0)"
-            echo "  --model_size MODEL_SIZE"
+            echo "  --model_path PATH Model path (default: configured in script)"
-            echo "                     Model size (default: 14b)"
+            echo "  --output_dir DIR  Output video save directory (default: ./outputs)"
-            echo "                     14b: 14 billion parameters model"
-            echo "                     1.3b: 1.3 billion parameters model"
-                echo "  --model_cls MODEL_CLASS"
-    echo "                     Model class (default: wan2.1)"
-    echo "                     wan2.1: Standard model variant"
-    echo "                     wan2.1_distill: Distilled model variant for faster inference"
-    echo "  --output_dir OUTPUT_DIR"
-    echo "                     Output video save directory (default: ./saved_videos)"
            echo "  --help            Show this help message"
-            echo ""
-            echo "🚀 Usage examples:"
-            echo "  $0                                    # Default startup for image-to-video mode"
-            echo "  $0 --task i2v --lang zh --port 8032   # Start with specified parameters"
-            echo "  $0 --task t2v --lang en --port 7860   # Text-to-video with English interface"
-            echo "  $0 --task i2v --gpu 1 --port 8032     # Use GPU 1"
-                echo "  $0 --task t2v --model_size 1.3b       # Use 1.3B model"
-    echo "  $0 --task i2v --model_size 14b        # Use 14B model"
-    echo "  $0 --task i2v --model_cls wan2.1_distill  # Use distilled model"
-    echo "  $0 --task i2v --output_dir ./custom_output  # Use custom output directory"
            echo ""
            echo "📝 Notes:"
+            echo "  - Task type (i2v/t2v) and model type are selected in the web UI"
+            echo "  - Model class is auto-detected based on selected diffusion model"
            echo "  - Edit script to configure model paths before first use"
            echo "  - Ensure required Python dependencies are installed"
            echo "  - Recommended to use GPU with 8GB+ VRAM"
@@ -139,37 +98,11 @@ while [[ $# -gt 0 ]]; do
 done
 # ==================== Parameter Validation ====================
-if [[ "$task" != "i2v" && "$task" != "t2v" ]]; then
-    echo "Error: Task type must be 'i2v' or 't2v'"
-    exit 1
-fi
 if [[ "$lang" != "zh" && "$lang" != "en" ]]; then
    echo "Error: Language must be 'zh' or 'en'"
    exit 1
 fi
-# Validate model size
-if [[ "$model_size" != "14b" && "$model_size" != "1.3b" ]]; then
-    echo "Error: Model size must be '14b' or '1.3b'"
-    exit 1
-fi
-# Validate model class
-if [[ "$model_cls" != "wan2.1" && "$model_cls" != "wan2.1_distill" ]]; then
-    echo "Error: Model class must be 'wan2.1' or 'wan2.1_distill'"
-    exit 1
-fi
-# Select model path based on task type
-if [[ "$task" == "i2v" ]]; then
-    model_path=$i2v_model_path
-    echo "🎬 Starting Image-to-Video mode"
-else
-    model_path=$t2v_model_path
-    echo "🎬 Starting Text-to-Video mode"
-fi
 # Check if model path exists
 if [[ ! -d "$model_path" ]]; then
    echo "❌ Error: Model path does not exist"
@@ -208,13 +141,11 @@ echo "🚀 Lightx2v Gradio Demo Starting..."
 echo "=========================================="
 echo "📁 Project path: $lightx2v_path"
 echo "🤖 Model path: $model_path"
-echo "🎯 Task type: $task"
-echo "🤖 Model size: $model_size"
-echo "🤖 Model class: $model_cls"
 echo "🌏 Interface language: $lang"
 echo "🖥️  GPU device: $gpu_id"
 echo "🌐 Server address: $server_name:$server_port"
 echo "📁 Output directory: $output_dir"
+echo "📝 Note: Task type and model class are selected in web UI"
 echo "=========================================="
 # Display system resource information
@@ -239,11 +170,8 @@ echo "=========================================="
 # Start Python demo
 python $demo_file \
    --model_path "$model_path" \
-    --model_cls "$model_cls" \
-    --task "$task" \
    --server_name "$server_name" \
    --server_port "$server_port" \
-    --model_size "$model_size" \
    --output_dir "$output_dir"
 # Display final system resource usage

--- a/app/run_gradio_win.bat
+++ b/app/run_gradio_win.bat
@@ -16,21 +16,9 @@ REM Example: D:\LightX2V
 set lightx2v_path=/path/to/LightX2V
 REM Model path configuration
-REM Image-to-video model path (for i2v tasks)
+REM Model root directory path
-REM Example: D:\models\Wan2.1-I2V-14B-480P-Lightx2v
+REM Example: D:\models\LightX2V
-set i2v_model_path=/path/to/Wan2.1-I2V-14B-480P-Lightx2v
+set model_path=/path/to/LightX2V
-REM Text-to-video model path (for t2v tasks)
-REM Example: D:\models\Wan2.1-T2V-1.3B
-set t2v_model_path=/path/to/Wan2.1-T2V-1.3B
-REM Model size configuration
-REM Default model size (14b, 1.3b)
-set model_size=14b
-REM Model class configuration
-REM Default model class (wan2.1, wan2.1_distill)
-set model_cls=wan2.1
 REM Server configuration
 set server_name=127.0.0.1
@@ -49,20 +37,12 @@ set PROFILING_DEBUG_LEVEL=2
 set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 REM ==================== Parameter Parsing ====================
-REM Default task type
-set task=i2v
 REM Default interface language
 set lang=zh
 REM Parse command line arguments
 :parse_args
 if "%1"=="" goto :end_parse
-if "%1"=="--task" (
-    set task=%2
-    shift
-    shift
-    goto :parse_args
-)
 if "%1"=="--lang" (
    set lang=%2
    shift
@@ -82,18 +62,6 @@ if "%1"=="--gpu" (
    shift
    goto :parse_args
 )
-if "%1"=="--model_size" (
-    set model_size=%2
-    shift
-    shift
-    goto :parse_args
-)
-if "%1"=="--model_cls" (
-    set model_cls=%2
-    shift
-    shift
-    goto :parse_args
-)
 if "%1"=="--output_dir" (
    set output_dir=%2
    shift
@@ -106,38 +74,24 @@ if "%1"=="--help" (
    echo Usage: %0 [options]
    echo.
    echo 📋 Available options:
-    echo   --task i2v^|t2v    Task type (default: i2v)
-    echo                      i2v: Image-to-video generation
-    echo                      t2v: Text-to-video generation
    echo   --lang zh^|en      Interface language (default: zh)
    echo                      zh: Chinese interface
    echo                      en: English interface
    echo   --port PORT        Server port (default: 8032)
    echo   --gpu GPU_ID       GPU device ID (default: 0)
-    echo   --model_size MODEL_SIZE
-    echo                      Model size (default: 14b)
-    echo                      14b: 14B parameter model
-    echo                      1.3b: 1.3B parameter model
-    echo   --model_cls MODEL_CLASS
-    echo                      Model class (default: wan2.1)
-    echo                      wan2.1: Standard model variant
-    echo                      wan2.1_distill: Distilled model variant for faster inference
    echo   --output_dir OUTPUT_DIR
-    echo                      Output video save directory (default: ./saved_videos)
+    echo                      Output video save directory (default: ./outputs)
    echo   --help             Show this help message
    echo.
    echo 🚀 Usage examples:
-    echo   %0                                    # Default startup for image-to-video mode
+    echo   %0                                    # Default startup
-    echo   %0 --task i2v --lang zh --port 8032   # Start with specified parameters
+    echo   %0 --lang zh --port 8032              # Start with specified parameters
-    echo   %0 --task t2v --lang en --port 7860   # Text-to-video with English interface
+    echo   %0 --lang en --port 7860              # English interface
-    echo   %0 --task i2v --gpu 1 --port 8032     # Use GPU 1
+    echo   %0 --gpu 1 --port 8032                # Use GPU 1
-    echo   %0 --task t2v --model_size 1.3b       # Use 1.3B model
+    echo   %0 --output_dir ./custom_output       # Use custom output directory
-    echo   %0 --task i2v --model_size 14b        # Use 14B model
-    echo   %0 --task i2v --model_cls wan2.1_distill  # Use distilled model
-    echo   %0 --task i2v --output_dir ./custom_output  # Use custom output directory
    echo.
    echo 📝 Notes:
-    echo   - Edit script to configure model paths before first use
+    echo   - Edit script to configure model path before first use
    echo   - Ensure required Python dependencies are installed
    echo   - Recommended to use GPU with 8GB+ VRAM
    echo   - 🚨 Strongly recommend storing models on SSD for better performance
@@ -152,13 +106,6 @@ exit /b 1
 :end_parse
 REM ==================== Parameter Validation ====================
-if "%task%"=="i2v" goto :valid_task
-if "%task%"=="t2v" goto :valid_task
-echo Error: Task type must be 'i2v' or 't2v'
-pause
-exit /b 1
-:valid_task
 if "%lang%"=="zh" goto :valid_lang
 if "%lang%"=="en" goto :valid_lang
 echo Error: Language must be 'zh' or 'en'
@@ -166,29 +113,6 @@ pause
 exit /b 1
 :valid_lang
-if "%model_size%"=="14b" goto :valid_size
-if "%model_size%"=="1.3b" goto :valid_size
-echo Error: Model size must be '14b' or '1.3b'
-pause
-exit /b 1
-:valid_size
-if "%model_cls%"=="wan2.1" goto :valid_cls
-if "%model_cls%"=="wan2.1_distill" goto :valid_cls
-echo Error: Model class must be 'wan2.1' or 'wan2.1_distill'
-pause
-exit /b 1
-:valid_cls
-REM Select model path based on task type
-if "%task%"=="i2v" (
-    set model_path=%i2v_model_path%
-    echo 🎬 Starting Image-to-Video mode
-) else (
-    set model_path=%t2v_model_path%
-    echo 🎬 Starting Text-to-Video mode
-)
 REM Check if model path exists
 if not exist "%model_path%" (
@@ -230,9 +154,6 @@ echo 🚀 LightX2V Gradio Starting...
 echo ==========================================
 echo 📁 Project path: %lightx2v_path%
 echo 🤖 Model path: %model_path%
-echo 🎯 Task type: %task%
-echo 🤖 Model size: %model_size%
-echo 🤖 Model class: %model_cls%
 echo 🌏 Interface language: %lang%
 echo 🖥️  GPU device: %gpu_id%
 echo 🌐 Server address: %server_name%:%server_port%
@@ -262,11 +183,8 @@ echo ==========================================
 REM Start Python demo
 python %demo_file% ^
    --model_path "%model_path%" ^
-    --model_cls %model_cls% ^
-    --task %task% ^
    --server_name %server_name% ^
    --server_port %server_port% ^
-    --model_size %model_size% ^
    --output_dir "%output_dir%"
 REM Display final system resource usage

--- a/docs/EN/source/deploy_guides/deploy_gradio.md
+++ b/docs/EN/source/deploy_guides/deploy_gradio.md
@@ -38,51 +38,52 @@ Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the
 - ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
 - ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
-Install according to the project homepage tutorials for each operator as needed
+Install according to the project homepage tutorials for each operator as needed.
-### 🤖 Supported Models
+### 📥 Model Download
-#### 🎬 Image-to-Video Models
+Refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
-| Model Name | Resolution | Parameters | Features | Recommended Use |
+#### wan2.1 Model Directory Structure
-|------------|------------|------------|----------|-----------------|
-| ✅ [Wan2.1-I2V-14B-480P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-Lightx2v) | 480p | 14B | Standard version | Balance speed and quality |
-| ✅ [Wan2.1-I2V-14B-720P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-Lightx2v) | 720p | 14B | HD version | Pursue high-quality output |
-| ✅ [Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v) | 480p | 14B | Distilled optimized version | Faster inference speed |
-| ✅ [Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v) | 720p | 14B | HD distilled version | High quality + fast inference |
-#### 📝 Text-to-Video Models
+```
+models/
-| Model Name | Parameters | Features | Recommended Use |
+├── wan2.1_i2v_720p_lightx2v_4step.safetensors                   # Original precision
-|------------|------------|----------|-----------------|
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors   # FP8 quantization
-| ✅ [Wan2.1-T2V-1.3B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-1.3B-Lightx2v) | 1.3B | Lightweight | Fast prototyping and testing |
+├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors              # INT8 quantization
-| ✅ [Wan2.1-T2V-14B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-Lightx2v) | 14B | Standard version | Balance speed and quality |
+├── wan2.1_i2v_720p_int8_lightx2v_4step_split                    # INT8 quantization block storage directory
-| ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | Distilled optimized version | High quality + fast inference |
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split         # FP8 quantization block storage directory
+├── Other weights (e.g., t2v)
-**💡 Model Selection Recommendations**:
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
- **First-time use**: Recommend choosing distilled versions (`wan2.1_distill`)
+├── vae/lightvae/lighttae               # vae
- **Pursuing quality**: Choose 720p resolution or 14B parameter models
+└── config.json                         # Model configuration file
- **Pursuing speed**: Choose 480p resolution or 1.3B parameter models, prioritize distilled versions
+```
- **Resource-constrained**: Prioritize distilled versions and lower resolutions
- **Real-time applications**: Strongly recommend using distilled models (`wan2.1_distill`)
-**🎯 Model Category Description**:
- **`wan2.1`**: Standard model, provides the best video generation quality, suitable for scenarios with extremely high quality requirements
- **`wan2.1_distill`**: Distilled model, optimized through knowledge distillation technology, significantly improves inference speed, maintains good quality while greatly reducing computation time, suitable for most application scenarios
-**📥 Model Download**:
-Refer to the [Model Structure Documentation](./model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
-**Download Options**:
- **Complete Model**: When downloading complete models with both quantized and non-quantized versions, you can freely choose the quantization precision for DIT/T5/CLIP in the advanced options of the `Gradio` Web frontend.
+#### wan2.2 Model Directory Structure
- **Non-quantized Version Only**: When downloading only non-quantized versions, in the `Gradio` Web frontend, the quantization precision for `DIT/T5/CLIP` can only be set to bf16/fp16. If you need to use quantized versions of models, please manually download quantized weights to the `i2v_model_path` or `t2v_model_path` directory where Gradio is started.
+```
+models/
+├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors        # high noise original precision
+├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors    # high noise FP8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors   # high noise INT8 quantization
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split         # high noise INT8 quantization block storage directory
+├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors         # low noise original precision
+├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors     # low noise FP8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors    # low noise INT8 quantization
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split          # low noise INT8 quantization block storage directory
+├── t5/clip/xlm-roberta-large/google    # text and image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # Model configuration file
+```
- **Quantized Version Only**: When downloading only quantized versions, in the `Gradio` Web frontend, the quantization precision for `DIT/T5/CLIP` can only be set to fp8 or int8 (depending on the weights you downloaded). If you need to use non-quantized versions of models, please manually download non-quantized weights to the `i2v_model_path` or `t2v_model_path` directory where Gradio is started.
+**📝 Download Instructions**:
- **Note**: Whether you download complete models or partial models, the values for `i2v_model_path` and `t2v_model_path` parameters should be the first-level directory paths. For example: `Wan2.1-I2V-14B-480P-Lightx2v/`, not `Wan2.1-I2V-14B-480P-Lightx2v/int8`.
+- Model weights can be downloaded from HuggingFace:
+  - [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
+  - [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
+- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoderss)
+- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
+- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
 ### Startup Methods
@@ -96,8 +97,7 @@ vim run_gradio.sh
 # Configuration items that need to be modified:
 # - lightx2v_path: Lightx2v project root directory path
-# - i2v_model_path: Image-to-video model path
+# - model_path: Model root directory path (contains all model files)
-# - t2v_model_path: Text-to-video model path
 # 💾 Important note: Recommend pointing model paths to SSD storage locations
 # Example: /mnt/ssd/models/ or /data/ssd/models/
@@ -105,11 +105,9 @@ vim run_gradio.sh
 # 2. Run the startup script
 bash run_gradio.sh
-# 3. Or start with parameters (recommended using distilled models)
+# 3. Or start with parameters
-bash run_gradio.sh --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
+bash run_gradio.sh --lang en --port 8032
-bash run_gradio.sh --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
+bash run_gradio.sh --lang zh --port 7862
-bash run_gradio.sh --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
-bash run_gradio.sh --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 **Windows Environment:**
@@ -120,8 +118,7 @@ notepad run_gradio_win.bat
 # Configuration items that need to be modified:
 # - lightx2v_path: Lightx2v project root directory path
-# - i2v_model_path: Image-to-video model path
+# - model_path: Model root directory path (contains all model files)
-# - t2v_model_path: Text-to-video model path
 # 💾 Important note: Recommend pointing model paths to SSD storage locations
 # Example: D:\models\ or E:\models\
@@ -129,201 +126,101 @@ notepad run_gradio_win.bat
 # 2. Run the startup script
 run_gradio_win.bat
-# 3. Or start with parameters (recommended using distilled models)
+# 3. Or start with parameters
-run_gradio_win.bat --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
+run_gradio_win.bat --lang en --port 8032
-run_gradio_win.bat --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
+run_gradio_win.bat --lang zh --port 7862
-run_gradio_win.bat --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
-run_gradio_win.bat --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 #### Method 2: Direct Command Line Startup
+```bash
+pip install -v git+https://github.com/ModelTC/LightX2V.git
+```
 **Linux Environment:**
-**Image-to-Video Mode:**
+**English Interface Version:**
 ```bash
 python gradio_demo.py \
-    --model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \
+    --model_path /path/to/models \
-    --model_cls wan2.1 \
-    --model_size 14b \
-    --task i2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
-**English Interface Version:**
+**Chinese Interface Version:**
 ```bash
-python gradio_demo.py \
+python gradio_demo_zh.py \
-    --model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \
+    --model_path /path/to/models \
-    --model_cls wan2.1_distill \
-    --model_size 14b \
-    --task t2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
 **Windows Environment:**
-**Image-to-Video Mode:**
+**English Interface Version:**
 ```cmd
 python gradio_demo.py ^
-    --model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^
+    --model_path D:\models ^
-    --model_cls wan2.1 ^
-    --model_size 14b ^
-    --task i2v ^
    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
-**English Interface Version:**
+**Chinese Interface Version:**
 ```cmd
-python gradio_demo.py ^
+python gradio_demo_zh.py ^
-    --model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^
+    --model_path D:\models ^
-    --model_cls wan2.1_distill ^
-    --model_size 14b ^
-    --task t2v ^
    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
+**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
 ## 📋 Command Line Parameters
 | Parameter | Type | Required | Default | Description |
 |-----------|------|----------|---------|-------------|
-| `--model_path` | str | ✅ | - | Model folder path |
+| `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
-| `--model_cls` | str | ❌ | wan2.1 | Model class: `wan2.1` (standard model) or `wan2.1_distill` (distilled model, faster inference) |
-| `--model_size` | str | ✅ | - | Model size: `14b (image-to-video or text-to-video)` or `1.3b (text-to-video)` |
-| `--task` | str | ✅ | - | Task type: `i2v` (image-to-video) or `t2v` (text-to-video) |
 | `--server_port` | int | ❌ | 7862 | Server port |
 | `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
+| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
+**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
 ## 🎯 Features
-### Basic Settings
+### Model Configuration
+- **Model Type**: Supports wan2.1 and wan2.2 model architectures
+- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
+- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
+- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
+- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
+### Input Parameters
-#### Input Parameters
 - **Prompt**: Describe the expected video content
 - **Negative Prompt**: Specify elements you don't want to appear
+- **Input Image**: Upload input image required in i2v mode
 - **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
 - **Random Seed**: Controls the randomness of generation results
- **Inference Steps**: Affects the balance between generation quality and speed
+- **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
+### Video Parameters
-#### Video Parameters
 - **FPS**: Frames per second
 - **Total Frames**: Video length
- **CFG Scale Factor**: Controls prompt influence strength (1-10)
+- **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
 - **Distribution Shift**: Controls generation style deviation degree (0-10)
-### Advanced Optimization Options
+## 🔧 Auto-Configuration Feature
-#### GPU Memory Optimization
- **Chunked Rotary Position Embedding**: Saves GPU memory
- **Rotary Embedding Chunk Size**: Controls chunk granularity
- **Clean CUDA Cache**: Promptly frees GPU memory
-#### Asynchronous Offloading
- **CPU Offloading**: Transfers partial computation to CPU
- **Lazy Loading**: Loads model components on-demand, significantly reduces system memory consumption
- **Offload Granularity Control**: Fine-grained control of offloading strategies
-#### Low-Precision Quantization
- **Attention Operators**: Flash Attention, Sage Attention, etc.
- **Quantization Operators**: vLLM, SGL, Q8F, etc.
- **Precision Modes**: FP8, INT8, BF16, etc.
-#### VAE Optimization
- **Lightweight VAE**: Accelerates decoding process
- **VAE Tiling Inference**: Reduces memory usage
-#### Feature Caching
+The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
- **Tea Cache**: Caches intermediate features to accelerate generation
- **Cache Threshold**: Controls cache trigger conditions
- **Key Step Caching**: Writes cache only at key steps
-## 🔧 Auto-Configuration Feature
+- **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
+- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
+- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
+- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
-After enabling "Auto-configure Inference Options", the system will automatically optimize parameters based on your hardware configuration:
-### GPU Memory Rules
- **80GB+**: Default configuration, no optimization needed
- **48GB**: Enable CPU offloading, offload ratio 50%
- **40GB**: Enable CPU offloading, offload ratio 80%
- **32GB**: Enable CPU offloading, offload ratio 100%
- **24GB**: Enable BF16 precision, VAE tiling
- **16GB**: Enable chunked offloading, rotary embedding chunking
- **12GB**: Enable cache cleaning, lightweight VAE
- **8GB**: Enable quantization, lazy loading
-### CPU Memory Rules
- **128GB+**: Default configuration
- **64GB**: Enable DIT quantization
- **32GB**: Enable lazy loading
- **16GB**: Enable full model quantization
-## ⚠️ Important Notes
-### 🚀 Low-Resource Device Optimization Recommendations
-**💡 For devices with insufficient VRAM or performance constraints**:
- **🎯 Model Selection**: Prioritize using distilled version models (`wan2.1_distill`)
- **⚡ Inference Steps**: Recommend setting to 4 steps
- **🔧 CFG Settings**: Recommend disabling CFG option to improve generation speed
- **🔄 Auto-Configuration**: Enable "Auto-configure Inference Options"
- **💾 Storage Optimization**: Ensure models are stored on SSD for optimal loading performance
-## 🎨 Interface Description
-### Basic Settings Tab
- **Input Parameters**: Prompts, resolution, and other basic settings
- **Video Parameters**: FPS, frame count, CFG, and other video generation parameters
- **Output Settings**: Video save path configuration
-### Advanced Options Tab
- **GPU Memory Optimization**: Memory management related options
- **Asynchronous Offloading**: CPU offloading and lazy loading
- **Low-Precision Quantization**: Various quantization optimization options
- **VAE Optimization**: Variational Autoencoder optimization
- **Feature Caching**: Cache strategy configuration
-## 🔍 Troubleshooting
-### Common Issues
-**💡 Tip**: Generally, after enabling "Auto-configure Inference Options", the system will automatically optimize parameter settings based on your hardware configuration, and performance issues usually won't occur. If you encounter problems, please refer to the following solutions:
-1. **Gradio Webpage Opens Blank**
-   - Try upgrading gradio: `pip install --upgrade gradio`
-2. **CUDA Memory Insufficient**
-   - Enable CPU offloading
-   - Reduce resolution
-   - Enable quantization options
-3. **System Memory Insufficient**
-   - Enable CPU offloading
-   - Enable lazy loading option
-   - Enable quantization options
-4. **Slow Generation Speed**
-   - Reduce inference steps
-   - Enable auto-configuration
-   - Use lightweight models
-   - Enable Tea Cache
-   - Use quantization operators
-   - 💾 **Check if models are stored on SSD**
-5. **Slow Model Loading**
-   - 💾 **Migrate models to SSD storage**
-   - Enable lazy loading option
-   - Check disk I/O performance
-   - Consider using NVMe SSD
-6. **Poor Video Quality**
-   - Increase inference steps
-   - Increase CFG scale factor
-   - Use 14B models
-   - Optimize prompts
 ### Log Viewing

--- a/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
+++ b/docs/ZH_CN/source/deploy_guides/deploy_gradio.md
@@ -38,51 +38,53 @@ LightX2V/app/
 - ✅ [sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
 - ✅ [q8-kernel](https://github.com/KONAKONA666/q8_kernels) (仅支持ADA架构的GPU)
-可根据需要，按照各算子的项目主页教程进行安装
+可根据需要，按照各算子的项目主页教程进行安装。
-### 🤖 支持的模型
+### 📥 模型下载
-#### 🎬 图像到视频模型 (Image-to-Video)
+可参考[模型结构文档](../getting_started/model_structure.md)下载完整模型（包含量化和非量化版本）或仅下载量化/非量化版本。
-| 模型名称 | 分辨率 | 参数量 | 特点 | 推荐场景 |
+#### wan2.1 模型目录结构
-|----------|--------|--------|------|----------|
-| ✅ [Wan2.1-I2V-14B-480P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-Lightx2v) | 480p | 14B | 标准版本 | 平衡速度和质量 |
-| ✅ [Wan2.1-I2V-14B-720P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-Lightx2v) | 720p | 14B | 高清版本 | 追求高质量输出 |
-| ✅ [Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v) | 480p | 14B | 蒸馏优化版 | 更快的推理速度 |
-| ✅ [Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v) | 720p | 14B | 高清蒸馏版 | 高质量+快速推理 |
-#### 📝 文本到视频模型 (Text-to-Video)
+```
+models/
-| 模型名称 | 参数量 | 特点 | 推荐场景 |
+├── wan2.1_i2v_720p_lightx2v_4step.safetensors                   # 原始精度
-|----------|--------|------|----------|
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors   # FP8 量化
-| ✅ [Wan2.1-T2V-1.3B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-1.3B-Lightx2v) | 1.3B | 轻量级 | 快速原型测试 |
+├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors              # INT8 量化
-| ✅ [Wan2.1-T2V-14B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-Lightx2v) | 14B | 标准版本 | 平衡速度和质量 |
+├── wan2.1_i2v_720p_int8_lightx2v_4step_split                    # INT8 量化分block存储目录
-| ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | 蒸馏优化版 | 高质量+快速推理 |
+├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split         # FP8 量化分block存储目录
+├── 其他权重(例如t2v)
-**💡 模型选择建议**:
+├── t5/clip/xlm-roberta-large/google    # text和image encoder
- **首次使用**: 建议选择蒸馏版本 (`wan2.1_distill`)
+├── vae/lightvae/lighttae               # vae
- **追求质量**: 选择720p分辨率或14B参数模型
+└── config.json                         # 模型配置文件
- **追求速度**: 选择480p分辨率或1.3B参数模型，优先使用蒸馏版本
+```
- **资源受限**: 优先选择蒸馏版本和较低分辨率
- **实时应用**: 强烈推荐使用蒸馏模型 (`wan2.1_distill`)
-**🎯 模型类别说明**:
- **`wan2.1`**: 标准模型，提供最佳的视频生成质量，适合对质量要求极高的场景
- **`wan2.1_distill`**: 蒸馏模型，通过知识蒸馏技术优化，推理速度显著提升，在保持良好质量的同时大幅减少计算时间，适合大多数应用场景
-**📥 下载模型**:
-可参考[模型结构文档](./model_structure.md)下载完整模型（包含量化和非量化版本）或仅下载量化/非量化版本。
-**下载选项说明**：
+#### wan2.2 模型目录结构
- **完整模型**：下载包含量化和非量化版本的完整模型时，在`Gradio` Web前端的高级选项中可以自由选择DIT/T5/CLIP的量化精度。
+```
+models/
+├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors        # high noise 原始精度
+├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors    # high noise FP8 量化
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors   # high noise INT8 量化
+├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split         # high noise INT8 量化分block存储目录
+├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors         # low noise 原始精度
+├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors     # low noise FP8 量化
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors    # low noise INT8 量化
+├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split          # low noise INT8 量化分block存储目录
+├── t5/clip/xlm-roberta-large/google    # text和image encoder
+├── vae/lightvae/lighttae               # vae
+└── config.json                         # 模型配置文件
+```
- **仅非量化版本**：仅下载非量化版本时，在`Gradio` Web前端中，`DIT/T5/CLIP`的量化精度只能选择bf16/fp16。如需使用量化版本的模型，请手动下载量化权重到Gradio启动的`i2v_model_path`或者`t2v_model_path`目录下。
+**📝 下载说明**：
- **仅量化版本**：仅下载量化版本时，在`Gradio` Web前端中，`DIT/T5/CLIP`的量化精度只能选择fp8或int8（取决于您下载的权重）。如需使用非量化版本的模型，请手动下载非量化权重到Gradio启动的`i2v_model_path`或者`t2v_model_path`目录下。
+- 模型权重可从 HuggingFace 下载：
+  - [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
+  - [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
+- Text 和 Image Encoder 可从 [Encoders](https://huggingface.co/lightx2v/Encoderss) 下载
+- VAE 可从 [Autoencoders](https://huggingface.co/lightx2v/Autoencoders) 下载
+- 对于 `xxx_split` 目录（例如 `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`），即按照 block 存储的多个 safetensors，适用于内存不足的设备。例如内存 16GB 以内，请根据自身情况下载
- **注意**：无论是下载了完整模型还是部分模型，`i2v_model_path` 和 `t2v_model_path` 参数的值都应该是一级目录的路径。例如：`Wan2.1-I2V-14B-480P-Lightx2v/`，而不是 `Wan2.1-I2V-14B-480P-Lightx2v/int8`。
 ### 启动方式
@@ -96,8 +98,7 @@ vim run_gradio.sh
 # 需要修改的配置项：
 # - lightx2v_path: Lightx2v项目根目录路径
-# - i2v_model_path: 图像到视频模型路径
+# - model_path: 模型根目录路径（包含所有模型文件）
-# - t2v_model_path: 文本到视频模型路径
 # 💾 重要提示：建议将模型路径指向SSD存储位置
 # 例如：/mnt/ssd/models/ 或 /data/ssd/models/
@@ -105,11 +106,9 @@ vim run_gradio.sh
 # 2. 运行启动脚本
 bash run_gradio.sh
-# 3. 或使用参数启动（推荐使用蒸馏模型）
+# 3. 或使用参数启动
-bash run_gradio.sh --task i2v --lang zh --model_cls wan2.1 --model_size 14b --port 8032
+bash run_gradio.sh --lang zh --port 8032
-bash run_gradio.sh --task t2v --lang zh --model_cls wan2.1 --model_size 1.3b --port 8032
+bash run_gradio.sh --lang en --port 7862
-bash run_gradio.sh --task i2v --lang zh --model_cls wan2.1_distill --model_size 14b --port 8032
-bash run_gradio.sh --task t2v --lang zh --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 **Windows 环境：**
@@ -120,8 +119,7 @@ notepad run_gradio_win.bat
 # 需要修改的配置项：
 # - lightx2v_path: Lightx2v项目根目录路径
-# - i2v_model_path: 图像到视频模型路径
+# - model_path: 模型根目录路径（包含所有模型文件）
-# - t2v_model_path: 文本到视频模型路径
 # 💾 重要提示：建议将模型路径指向SSD存储位置
 # 例如：D:\models\ 或 E:\models\
@@ -129,24 +127,23 @@ notepad run_gradio_win.bat
 # 2. 运行启动脚本
 run_gradio_win.bat
-# 3. 或使用参数启动（推荐使用蒸馏模型）
+# 3. 或使用参数启动
-run_gradio_win.bat --task i2v --lang zh --model_cls wan2.1 --model_size 14b --port 8032
+run_gradio_win.bat --lang zh --port 8032
-run_gradio_win.bat --task t2v --lang zh --model_cls wan2.1 --model_size 1.3b --port 8032
+run_gradio_win.bat --lang en --port 7862
-run_gradio_win.bat --task i2v --lang zh --model_cls wan2.1_distill --model_size 14b --port 8032
-run_gradio_win.bat --task t2v --lang zh --model_cls wan2.1_distill --model_size 1.3b --port 8032
 ```
 #### 方式二：直接命令行启动
+```bash
+pip install -v git+https://github.com/ModelTC/LightX2V.git
+```
 **Linux 环境：**
-**图像到视频模式：**
+**中文界面版本：**
 ```bash
 python gradio_demo_zh.py \
-    --model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \
+    --model_path /path/to/models \
-    --model_cls wan2.1 \
-    --model_size 14b \
-    --task i2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
@@ -154,176 +151,77 @@ python gradio_demo_zh.py \
 **英文界面版本：**
 ```bash
 python gradio_demo.py \
-    --model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \
+    --model_path /path/to/models \
-    --model_cls wan2.1_distill \
-    --model_size 14b \
-    --task t2v \
    --server_name 0.0.0.0 \
    --server_port 7862
 ```
 **Windows 环境：**
-**图像到视频模式：**
+**中文界面版本：**
 ```cmd
 python gradio_demo_zh.py ^
-    --model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^
+    --model_path D:\models ^
-    --model_cls wan2.1 ^
-    --model_size 14b ^
-    --task i2v ^
    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
 **英文界面版本：**
 ```cmd
-python gradio_demo_zh.py ^
+python gradio_demo.py ^
-    --model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^
+    --model_path D:\models ^
-    --model_cls wan2.1_distill ^
-    --model_size 14b ^
-    --task i2v ^
    --server_name 127.0.0.1 ^
    --server_port 7862
 ```
+**💡 提示**：模型类型（wan2.1/wan2.2）、任务类型（i2v/t2v）以及具体的模型文件选择均在 Web 界面中进行配置。
 ## 📋 命令行参数
 | 参数 | 类型 | 必需 | 默认值 | 说明 |
 |------|------|------|--------|------|
-| `--model_path` | str | ✅ | - | 模型文件夹路径 |
+| `--model_path` | str | ✅ | - | 模型根目录路径（包含所有模型文件的目录） |
-| `--model_cls` | str | ❌ | wan2.1 | 模型类别：`wan2.1`（标准模型）或 `wan2.1_distill`（蒸馏模型，推理更快） |
-| `--model_size` | str | ✅ | - | 模型大小：`14b` 或 `1.3b）` |
-| `--task` | str | ✅ | - | 任务类型：`i2v`（图像到视频）或 `t2v`（文本到视频） |
 | `--server_port` | int | ❌ | 7862 | 服务器端口 |
 | `--server_name` | str | ❌ | 0.0.0.0 | 服务器IP地址 |
+| `--output_dir` | str | ❌ | ./outputs | 输出视频保存目录 |
+**💡 说明**：模型类型（wan2.1/wan2.2）、任务类型（i2v/t2v）以及具体的模型文件选择均在 Web 界面中进行配置。
 ## 🎯 功能特性
-### 基本设置
+### 模型配置
+- **模型类型**: 支持 wan2.1 和 wan2.2 两种模型架构
+- **任务类型**: 支持图像到视频（i2v）和文本到视频（t2v）两种生成模式
+- **模型选择**: 前端自动识别并筛选可用的模型文件，支持自动检测量化精度
+- **编码器配置**: 支持选择 T5 文本编码器、CLIP 图像编码器和 VAE 解码器
+- **算子选择**: 支持多种注意力算子和量化矩阵乘法算子，系统会根据安装状态自动排序
+### 输入参数
-#### 输入参数
 - **提示词 (Prompt)**: 描述期望的视频内容
 - **负向提示词 (Negative Prompt)**: 指定不希望出现的元素
+- **输入图像**: i2v 模式下需要上传输入图像
 - **分辨率**: 支持多种预设分辨率（480p/540p/720p）
 - **随机种子**: 控制生成结果的随机性
- **推理步数**: 影响生成质量和速度的平衡
+- **推理步数**: 影响生成质量和速度的平衡（蒸馏模型默认为 4 步）
+### 视频参数
-#### 视频参数
 - **FPS**: 每秒帧数
 - **总帧数**: 视频长度
- **CFG缩放因子**: 控制提示词影响强度（1-10）
+- **CFG缩放因子**: 控制提示词影响强度（1-10，蒸馏模型默认为 1）
 - **分布偏移**: 控制生成风格偏离程度（0-10）
-### 高级优化选项
+## 🔧 自动配置功能
-#### GPU内存优化
- **分块旋转位置编码**: 节省GPU内存
- **旋转编码块大小**: 控制分块粒度
- **清理CUDA缓存**: 及时释放GPU内存
-#### 异步卸载
- **CPU卸载**: 将部分计算转移到CPU
- **延迟加载**: 按需加载模型组件，显著节省系统内存消耗
- **卸载粒度控制**: 精细控制卸载策略
-#### 低精度量化
- **注意力算子**: Flash Attention、Sage Attention等
- **量化算子**: vLLM、SGL、Q8F等
- **精度模式**: FP8、INT8、BF16等
-#### VAE优化
- **轻量级VAE**: 加速解码过程
- **VAE分块推理**: 减少内存占用
-#### 特征缓存
+系统会根据您的硬件配置（GPU 显存和 CPU 内存）自动配置最优推理选项，无需手动调整。启动时会自动应用最佳配置，包括：
- **Tea Cache**: 缓存中间特征加速生成
- **缓存阈值**: 控制缓存触发条件
- **关键步缓存**: 仅在关键步骤写入缓存
-## 🔧 自动配置功能
+- **GPU 内存优化**: 根据显存大小自动启用 CPU 卸载、VAE 分块推理等
+- **CPU 内存优化**: 根据系统内存自动启用延迟加载、模块卸载等
+- **算子选择**: 自动选择已安装的最优算子（按优先级排序）
+- **量化配置**: 根据模型文件名自动检测并应用量化精度
-启用"自动配置推理选项"后，系统会根据您的硬件配置自动优化参数：
-### GPU内存规则
- **80GB+**: 默认配置，无需优化
- **48GB**: 启用CPU卸载，卸载比例50%
- **40GB**: 启用CPU卸载，卸载比例80%
- **32GB**: 启用CPU卸载，卸载比例100%
- **24GB**: 启用BF16精度、VAE分块
- **16GB**: 启用分块卸载、旋转编码分块
- **12GB**: 启用清理缓存、轻量级VAE
- **8GB**: 启用量化、延迟加载
-### CPU内存规则
- **128GB+**: 默认配置
- **64GB**: 启用DIT量化
- **32GB**: 启用延迟加载
- **16GB**: 启用全模型量化
-## ⚠️ 重要注意事项
-### 🚀 低资源设备优化建议
-**💡 针对显存不足或性能受限的设备**:
- **🎯 模型选择**: 优先使用蒸馏版本模型 (`wan2.1_distill`)
- **⚡ 推理步数**: 建议设置为 4 步
- **🔧 CFG设置**: 建议关闭CFG选项以提升生成速度
- **🔄 自动配置**: 启用"自动配置推理选项"
- **💾 存储优化**: 确保模型存储在SSD上以获得最佳加载性能
-## 🎨 界面说明
-### 基本设置标签页
- **输入参数**: 提示词、分辨率等基本设置
- **视频参数**: FPS、帧数、CFG等视频生成参数
- **输出设置**: 视频保存路径配置
-### 高级选项标签页
- **GPU内存优化**: 内存管理相关选项
- **异步卸载**: CPU卸载和延迟加载
- **低精度量化**: 各种量化优化选项
- **VAE优化**: 变分自编码器优化
- **特征缓存**: 缓存策略配置
-## 🔍 故障排除
-### 常见问题
-**💡 提示**: 一般情况下，启用"自动配置推理选项"后，系统会根据您的硬件配置自动优化参数设置，通常不会出现性能问题。如果遇到问题，请参考以下解决方案：
-1. **Gradio网页打开空白**
-   - 尝试升级gradio `pip install --upgrade gradio`
-2. **CUDA内存不足**
-   - 启用CPU卸载
-   - 降低分辨率
-   - 启用量化选项
-3. **系统内存不足**
-   - 启用CPU卸载
-   - 启用延迟加载选项
-   - 启用量化选项
-4. **生成速度慢**
-   - 减少推理步数
-   - 启用自动配置
-   - 使用轻量级模型
-   - 启用Tea Cache
-   - 使用量化算子
-   - 💾 **检查模型是否存放在SSD上**
-5. **模型加载缓慢**
-   - 💾 **将模型迁移到SSD存储**
-   - 启用延迟加载选项
-   - 检查磁盘I/O性能
-   - 考虑使用NVMe SSD
-6. **视频质量不佳**
-   - 增加推理步数
-   - 提高CFG缩放因子
-   - 使用14B模型
-   - 优化提示词
 ### 日志查看

--- a/lightx2v/common/offload/manager.py
+++ b/lightx2v/common/offload/manager.py
+import time
 from concurrent.futures import ThreadPoolExecutor
 import torch
@@ -115,8 +116,6 @@ class WeightAsyncStreamManager(object):
            self.prefetch_futures.append(future)
    def swap_cpu_buffers(self):
-        import time
        wait_start = time.time()
        already_done = all(f.done() for f in self.prefetch_futures)
        for f in self.prefetch_futures:
@@ -125,25 +124,11 @@ class WeightAsyncStreamManager(object):
        logger.debug(f"[Prefetch] block {self.prefetch_block_idx}: wait={wait_time:.3f}s, already_done={already_done}")
        self.cpu_buffers = [self.cpu_buffers[1], self.cpu_buffers[0]]
-    def shutdown(self, wait=True):
+    def __del__(self):
-        """Shutdown the thread pool executor and wait for all pending tasks to complete."""
        if hasattr(self, "executor") and self.executor is not None:
-            # Wait for all pending futures to complete before shutting down
-            if hasattr(self, "prefetch_futures"):
            for f in self.prefetch_futures:
-                    try:
                if not f.done():
                    f.result()
-                    except Exception:
+            self.executor.shutdown(wait=False)
-                        pass
-            self.executor.shutdown(wait=wait)
            self.executor = None
            logger.debug("ThreadPoolExecutor shut down successfully.")
-    def __del__(self):
-        """Cleanup method to ensure executor is shut down when object is destroyed."""
-        try:
-            if hasattr(self, "executor") and self.executor is not None:
-                self.executor.shutdown(wait=False)
-        except Exception:
-            pass
--- a/lightx2v/models/networks/wan/model.py
+++ b/lightx2v/models/networks/wan/model.py
@@ -178,7 +178,7 @@ class WanModel(CompiledMethodsMixin):
            if os.path.exists(non_block_file):
                safetensors_files = [non_block_file]
            else:
-                raise ValueError(f"Non-block file not found in {safetensors_path}")
+                raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
        weight_dict = {}
        for file_path in safetensors_files:
@@ -221,7 +221,7 @@ class WanModel(CompiledMethodsMixin):
            if os.path.exists(non_block_file):
                safetensors_files = [non_block_file]
            else:
-                raise ValueError(f"Non-block file not found in {safetensors_path}, Please check the lazy load model path")
+                raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
        weight_dict = {}
        for safetensor_path in safetensors_files: