Unverified Commit 9a765f9b authored by Gu Shiqiao's avatar Gu Shiqiao Committed by GitHub
Browse files

fix gradio (#587)

parent 8530a2fb
This diff is collapsed.
This diff is collapsed.
...@@ -14,27 +14,15 @@ ...@@ -14,27 +14,15 @@
# Lightx2v project root directory path # Lightx2v project root directory path
# Example: /home/user/lightx2v or /data/video_gen/lightx2v # Example: /home/user/lightx2v or /data/video_gen/lightx2v
lightx2v_path=/data/video_gen/LightX2V lightx2v_path=/path/to/LightX2V
# Model path configuration # Model path configuration
# Image-to-video model path (for i2v tasks)
# Example: /path/to/Wan2.1-I2V-14B-720P-Lightx2v # Example: /path/to/Wan2.1-I2V-14B-720P-Lightx2v
i2v_model_path=/path/to/Wan2.1-I2V-14B-480P-Lightx2v model_path=/path/to/models
# Text-to-video model path (for t2v tasks)
# Example: /path/to/Wan2.1-T2V-1.3B
t2v_model_path=/path/to/Wan2.1-T2V-1.3B
# Model size configuration
# Default model size (14b, 1.3b)
model_size="14b"
# Model class configuration
# Default model class (wan2.1, wan2.1_distill)
model_cls="wan2.1"
# Server configuration # Server configuration
server_name="0.0.0.0" server_name="0.0.0.0"
server_port=8032 server_port=8033
# Output directory configuration # Output directory configuration
output_dir="./outputs" output_dir="./outputs"
...@@ -50,18 +38,12 @@ export PROFILING_DEBUG_LEVEL=2 ...@@ -50,18 +38,12 @@ export PROFILING_DEBUG_LEVEL=2
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# ==================== Parameter Parsing ==================== # ==================== Parameter Parsing ====================
# Default task type
task="i2v"
# Default interface language # Default interface language
lang="zh" lang="zh"
# 解析命令行参数 # 解析命令行参数
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
--task)
task="$2"
shift 2
;;
--lang) --lang)
lang="$2" lang="$2"
shift 2 shift 2
...@@ -75,55 +57,32 @@ while [[ $# -gt 0 ]]; do ...@@ -75,55 +57,32 @@ while [[ $# -gt 0 ]]; do
export CUDA_VISIBLE_DEVICES=$gpu_id export CUDA_VISIBLE_DEVICES=$gpu_id
shift 2 shift 2
;; ;;
--model_size)
model_size="$2"
shift 2
;;
--model_cls)
model_cls="$2"
shift 2
;;
--output_dir) --output_dir)
output_dir="$2" output_dir="$2"
shift 2 shift 2
;; ;;
--model_path)
model_path="$2"
shift 2
;;
--help) --help)
echo "🎬 Lightx2v Gradio Demo Startup Script" echo "🎬 Lightx2v Gradio Demo Startup Script"
echo "==========================================" echo "=========================================="
echo "Usage: $0 [options]" echo "Usage: $0 [options]"
echo "" echo ""
echo "📋 Available options:" echo "📋 Available options:"
echo " --task i2v|t2v Task type (default: i2v)"
echo " i2v: Image-to-video generation"
echo " t2v: Text-to-video generation"
echo " --lang zh|en Interface language (default: zh)" echo " --lang zh|en Interface language (default: zh)"
echo " zh: Chinese interface" echo " zh: Chinese interface"
echo " en: English interface" echo " en: English interface"
echo " --port PORT Server port (default: 8032)" echo " --port PORT Server port (default: 8032)"
echo " --gpu GPU_ID GPU device ID (default: 0)" echo " --gpu GPU_ID GPU device ID (default: 0)"
echo " --model_size MODEL_SIZE" echo " --model_path PATH Model path (default: configured in script)"
echo " Model size (default: 14b)" echo " --output_dir DIR Output video save directory (default: ./outputs)"
echo " 14b: 14 billion parameters model" echo " --help Show this help message"
echo " 1.3b: 1.3 billion parameters model"
echo " --model_cls MODEL_CLASS"
echo " Model class (default: wan2.1)"
echo " wan2.1: Standard model variant"
echo " wan2.1_distill: Distilled model variant for faster inference"
echo " --output_dir OUTPUT_DIR"
echo " Output video save directory (default: ./saved_videos)"
echo " --help Show this help message"
echo ""
echo "🚀 Usage examples:"
echo " $0 # Default startup for image-to-video mode"
echo " $0 --task i2v --lang zh --port 8032 # Start with specified parameters"
echo " $0 --task t2v --lang en --port 7860 # Text-to-video with English interface"
echo " $0 --task i2v --gpu 1 --port 8032 # Use GPU 1"
echo " $0 --task t2v --model_size 1.3b # Use 1.3B model"
echo " $0 --task i2v --model_size 14b # Use 14B model"
echo " $0 --task i2v --model_cls wan2.1_distill # Use distilled model"
echo " $0 --task i2v --output_dir ./custom_output # Use custom output directory"
echo "" echo ""
echo "📝 Notes:" echo "📝 Notes:"
echo " - Task type (i2v/t2v) and model type are selected in the web UI"
echo " - Model class is auto-detected based on selected diffusion model"
echo " - Edit script to configure model paths before first use" echo " - Edit script to configure model paths before first use"
echo " - Ensure required Python dependencies are installed" echo " - Ensure required Python dependencies are installed"
echo " - Recommended to use GPU with 8GB+ VRAM" echo " - Recommended to use GPU with 8GB+ VRAM"
...@@ -139,37 +98,11 @@ while [[ $# -gt 0 ]]; do ...@@ -139,37 +98,11 @@ while [[ $# -gt 0 ]]; do
done done
# ==================== Parameter Validation ==================== # ==================== Parameter Validation ====================
if [[ "$task" != "i2v" && "$task" != "t2v" ]]; then
echo "Error: Task type must be 'i2v' or 't2v'"
exit 1
fi
if [[ "$lang" != "zh" && "$lang" != "en" ]]; then if [[ "$lang" != "zh" && "$lang" != "en" ]]; then
echo "Error: Language must be 'zh' or 'en'" echo "Error: Language must be 'zh' or 'en'"
exit 1 exit 1
fi fi
# Validate model size
if [[ "$model_size" != "14b" && "$model_size" != "1.3b" ]]; then
echo "Error: Model size must be '14b' or '1.3b'"
exit 1
fi
# Validate model class
if [[ "$model_cls" != "wan2.1" && "$model_cls" != "wan2.1_distill" ]]; then
echo "Error: Model class must be 'wan2.1' or 'wan2.1_distill'"
exit 1
fi
# Select model path based on task type
if [[ "$task" == "i2v" ]]; then
model_path=$i2v_model_path
echo "🎬 Starting Image-to-Video mode"
else
model_path=$t2v_model_path
echo "🎬 Starting Text-to-Video mode"
fi
# Check if model path exists # Check if model path exists
if [[ ! -d "$model_path" ]]; then if [[ ! -d "$model_path" ]]; then
echo "❌ Error: Model path does not exist" echo "❌ Error: Model path does not exist"
...@@ -208,13 +141,11 @@ echo "🚀 Lightx2v Gradio Demo Starting..." ...@@ -208,13 +141,11 @@ echo "🚀 Lightx2v Gradio Demo Starting..."
echo "==========================================" echo "=========================================="
echo "📁 Project path: $lightx2v_path" echo "📁 Project path: $lightx2v_path"
echo "🤖 Model path: $model_path" echo "🤖 Model path: $model_path"
echo "🎯 Task type: $task"
echo "🤖 Model size: $model_size"
echo "🤖 Model class: $model_cls"
echo "🌏 Interface language: $lang" echo "🌏 Interface language: $lang"
echo "🖥️ GPU device: $gpu_id" echo "🖥️ GPU device: $gpu_id"
echo "🌐 Server address: $server_name:$server_port" echo "🌐 Server address: $server_name:$server_port"
echo "📁 Output directory: $output_dir" echo "📁 Output directory: $output_dir"
echo "📝 Note: Task type and model class are selected in web UI"
echo "==========================================" echo "=========================================="
# Display system resource information # Display system resource information
...@@ -239,11 +170,8 @@ echo "==========================================" ...@@ -239,11 +170,8 @@ echo "=========================================="
# Start Python demo # Start Python demo
python $demo_file \ python $demo_file \
--model_path "$model_path" \ --model_path "$model_path" \
--model_cls "$model_cls" \
--task "$task" \
--server_name "$server_name" \ --server_name "$server_name" \
--server_port "$server_port" \ --server_port "$server_port" \
--model_size "$model_size" \
--output_dir "$output_dir" --output_dir "$output_dir"
# Display final system resource usage # Display final system resource usage
......
...@@ -16,21 +16,9 @@ REM Example: D:\LightX2V ...@@ -16,21 +16,9 @@ REM Example: D:\LightX2V
set lightx2v_path=/path/to/LightX2V set lightx2v_path=/path/to/LightX2V
REM Model path configuration REM Model path configuration
REM Image-to-video model path (for i2v tasks) REM Model root directory path
REM Example: D:\models\Wan2.1-I2V-14B-480P-Lightx2v REM Example: D:\models\LightX2V
set i2v_model_path=/path/to/Wan2.1-I2V-14B-480P-Lightx2v set model_path=/path/to/LightX2V
REM Text-to-video model path (for t2v tasks)
REM Example: D:\models\Wan2.1-T2V-1.3B
set t2v_model_path=/path/to/Wan2.1-T2V-1.3B
REM Model size configuration
REM Default model size (14b, 1.3b)
set model_size=14b
REM Model class configuration
REM Default model class (wan2.1, wan2.1_distill)
set model_cls=wan2.1
REM Server configuration REM Server configuration
set server_name=127.0.0.1 set server_name=127.0.0.1
...@@ -49,20 +37,12 @@ set PROFILING_DEBUG_LEVEL=2 ...@@ -49,20 +37,12 @@ set PROFILING_DEBUG_LEVEL=2
set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
REM ==================== Parameter Parsing ==================== REM ==================== Parameter Parsing ====================
REM Default task type
set task=i2v
REM Default interface language REM Default interface language
set lang=zh set lang=zh
REM Parse command line arguments REM Parse command line arguments
:parse_args :parse_args
if "%1"=="" goto :end_parse if "%1"=="" goto :end_parse
if "%1"=="--task" (
set task=%2
shift
shift
goto :parse_args
)
if "%1"=="--lang" ( if "%1"=="--lang" (
set lang=%2 set lang=%2
shift shift
...@@ -82,18 +62,6 @@ if "%1"=="--gpu" ( ...@@ -82,18 +62,6 @@ if "%1"=="--gpu" (
shift shift
goto :parse_args goto :parse_args
) )
if "%1"=="--model_size" (
set model_size=%2
shift
shift
goto :parse_args
)
if "%1"=="--model_cls" (
set model_cls=%2
shift
shift
goto :parse_args
)
if "%1"=="--output_dir" ( if "%1"=="--output_dir" (
set output_dir=%2 set output_dir=%2
shift shift
...@@ -106,38 +74,24 @@ if "%1"=="--help" ( ...@@ -106,38 +74,24 @@ if "%1"=="--help" (
echo Usage: %0 [options] echo Usage: %0 [options]
echo. echo.
echo 📋 Available options: echo 📋 Available options:
echo --task i2v^|t2v Task type (default: i2v)
echo i2v: Image-to-video generation
echo t2v: Text-to-video generation
echo --lang zh^|en Interface language (default: zh) echo --lang zh^|en Interface language (default: zh)
echo zh: Chinese interface echo zh: Chinese interface
echo en: English interface echo en: English interface
echo --port PORT Server port (default: 8032) echo --port PORT Server port (default: 8032)
echo --gpu GPU_ID GPU device ID (default: 0) echo --gpu GPU_ID GPU device ID (default: 0)
echo --model_size MODEL_SIZE
echo Model size (default: 14b)
echo 14b: 14B parameter model
echo 1.3b: 1.3B parameter model
echo --model_cls MODEL_CLASS
echo Model class (default: wan2.1)
echo wan2.1: Standard model variant
echo wan2.1_distill: Distilled model variant for faster inference
echo --output_dir OUTPUT_DIR echo --output_dir OUTPUT_DIR
echo Output video save directory (default: ./saved_videos) echo Output video save directory (default: ./outputs)
echo --help Show this help message echo --help Show this help message
echo. echo.
echo 🚀 Usage examples: echo 🚀 Usage examples:
echo %0 # Default startup for image-to-video mode echo %0 # Default startup
echo %0 --task i2v --lang zh --port 8032 # Start with specified parameters echo %0 --lang zh --port 8032 # Start with specified parameters
echo %0 --task t2v --lang en --port 7860 # Text-to-video with English interface echo %0 --lang en --port 7860 # English interface
echo %0 --task i2v --gpu 1 --port 8032 # Use GPU 1 echo %0 --gpu 1 --port 8032 # Use GPU 1
echo %0 --task t2v --model_size 1.3b # Use 1.3B model echo %0 --output_dir ./custom_output # Use custom output directory
echo %0 --task i2v --model_size 14b # Use 14B model
echo %0 --task i2v --model_cls wan2.1_distill # Use distilled model
echo %0 --task i2v --output_dir ./custom_output # Use custom output directory
echo. echo.
echo 📝 Notes: echo 📝 Notes:
echo - Edit script to configure model paths before first use echo - Edit script to configure model path before first use
echo - Ensure required Python dependencies are installed echo - Ensure required Python dependencies are installed
echo - Recommended to use GPU with 8GB+ VRAM echo - Recommended to use GPU with 8GB+ VRAM
echo - 🚨 Strongly recommend storing models on SSD for better performance echo - 🚨 Strongly recommend storing models on SSD for better performance
...@@ -152,13 +106,6 @@ exit /b 1 ...@@ -152,13 +106,6 @@ exit /b 1
:end_parse :end_parse
REM ==================== Parameter Validation ==================== REM ==================== Parameter Validation ====================
if "%task%"=="i2v" goto :valid_task
if "%task%"=="t2v" goto :valid_task
echo Error: Task type must be 'i2v' or 't2v'
pause
exit /b 1
:valid_task
if "%lang%"=="zh" goto :valid_lang if "%lang%"=="zh" goto :valid_lang
if "%lang%"=="en" goto :valid_lang if "%lang%"=="en" goto :valid_lang
echo Error: Language must be 'zh' or 'en' echo Error: Language must be 'zh' or 'en'
...@@ -166,29 +113,6 @@ pause ...@@ -166,29 +113,6 @@ pause
exit /b 1 exit /b 1
:valid_lang :valid_lang
if "%model_size%"=="14b" goto :valid_size
if "%model_size%"=="1.3b" goto :valid_size
echo Error: Model size must be '14b' or '1.3b'
pause
exit /b 1
:valid_size
if "%model_cls%"=="wan2.1" goto :valid_cls
if "%model_cls%"=="wan2.1_distill" goto :valid_cls
echo Error: Model class must be 'wan2.1' or 'wan2.1_distill'
pause
exit /b 1
:valid_cls
REM Select model path based on task type
if "%task%"=="i2v" (
set model_path=%i2v_model_path%
echo 🎬 Starting Image-to-Video mode
) else (
set model_path=%t2v_model_path%
echo 🎬 Starting Text-to-Video mode
)
REM Check if model path exists REM Check if model path exists
if not exist "%model_path%" ( if not exist "%model_path%" (
...@@ -230,9 +154,6 @@ echo 🚀 LightX2V Gradio Starting... ...@@ -230,9 +154,6 @@ echo 🚀 LightX2V Gradio Starting...
echo ========================================== echo ==========================================
echo 📁 Project path: %lightx2v_path% echo 📁 Project path: %lightx2v_path%
echo 🤖 Model path: %model_path% echo 🤖 Model path: %model_path%
echo 🎯 Task type: %task%
echo 🤖 Model size: %model_size%
echo 🤖 Model class: %model_cls%
echo 🌏 Interface language: %lang% echo 🌏 Interface language: %lang%
echo 🖥️ GPU device: %gpu_id% echo 🖥️ GPU device: %gpu_id%
echo 🌐 Server address: %server_name%:%server_port% echo 🌐 Server address: %server_name%:%server_port%
...@@ -262,11 +183,8 @@ echo ========================================== ...@@ -262,11 +183,8 @@ echo ==========================================
REM Start Python demo REM Start Python demo
python %demo_file% ^ python %demo_file% ^
--model_path "%model_path%" ^ --model_path "%model_path%" ^
--model_cls %model_cls% ^
--task %task% ^
--server_name %server_name% ^ --server_name %server_name% ^
--server_port %server_port% ^ --server_port %server_port% ^
--model_size %model_size% ^
--output_dir "%output_dir%" --output_dir "%output_dir%"
REM Display final system resource usage REM Display final system resource usage
......
...@@ -38,51 +38,52 @@ Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the ...@@ -38,51 +38,52 @@ Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the
-[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel) -[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
-[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs) -[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
Install according to the project homepage tutorials for each operator as needed Install according to the project homepage tutorials for each operator as needed.
### 🤖 Supported Models ### 📥 Model Download
#### 🎬 Image-to-Video Models Refer to the [Model Structure Documentation](../getting_started/model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
| Model Name | Resolution | Parameters | Features | Recommended Use | #### wan2.1 Model Directory Structure
|------------|------------|------------|----------|-----------------|
| ✅ [Wan2.1-I2V-14B-480P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-Lightx2v) | 480p | 14B | Standard version | Balance speed and quality |
| ✅ [Wan2.1-I2V-14B-720P-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-Lightx2v) | 720p | 14B | HD version | Pursue high-quality output |
| ✅ [Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-480P-StepDistill-CfgDistill-Lightx2v) | 480p | 14B | Distilled optimized version | Faster inference speed |
| ✅ [Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-I2V-14B-720P-StepDistill-CfgDistill-Lightx2v) | 720p | 14B | HD distilled version | High quality + fast inference |
#### 📝 Text-to-Video Models ```
models/
| Model Name | Parameters | Features | Recommended Use | ├── wan2.1_i2v_720p_lightx2v_4step.safetensors # Original precision
|------------|------------|----------|-----------------| ├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step.safetensors # FP8 quantization
| ✅ [Wan2.1-T2V-1.3B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-1.3B-Lightx2v) | 1.3B | Lightweight | Fast prototyping and testing | ├── wan2.1_i2v_720p_int8_lightx2v_4step.safetensors # INT8 quantization
| ✅ [Wan2.1-T2V-14B-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-Lightx2v) | 14B | Standard version | Balance speed and quality | ├── wan2.1_i2v_720p_int8_lightx2v_4step_split # INT8 quantization block storage directory
| ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | Distilled optimized version | High quality + fast inference | ├── wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split # FP8 quantization block storage directory
├── Other weights (e.g., t2v)
**💡 Model Selection Recommendations**: ├── t5/clip/xlm-roberta-large/google # text and image encoder
- **First-time use**: Recommend choosing distilled versions (`wan2.1_distill`) ├── vae/lightvae/lighttae # vae
- **Pursuing quality**: Choose 720p resolution or 14B parameter models └── config.json # Model configuration file
- **Pursuing speed**: Choose 480p resolution or 1.3B parameter models, prioritize distilled versions ```
- **Resource-constrained**: Prioritize distilled versions and lower resolutions
- **Real-time applications**: Strongly recommend using distilled models (`wan2.1_distill`)
**🎯 Model Category Description**:
- **`wan2.1`**: Standard model, provides the best video generation quality, suitable for scenarios with extremely high quality requirements
- **`wan2.1_distill`**: Distilled model, optimized through knowledge distillation technology, significantly improves inference speed, maintains good quality while greatly reducing computation time, suitable for most application scenarios
**📥 Model Download**:
Refer to the [Model Structure Documentation](./model_structure.md) to download complete models (including quantized and non-quantized versions) or download only quantized/non-quantized versions.
**Download Options**:
- **Complete Model**: When downloading complete models with both quantized and non-quantized versions, you can freely choose the quantization precision for DIT/T5/CLIP in the advanced options of the `Gradio` Web frontend. #### wan2.2 Model Directory Structure
- **Non-quantized Version Only**: When downloading only non-quantized versions, in the `Gradio` Web frontend, the quantization precision for `DIT/T5/CLIP` can only be set to bf16/fp16. If you need to use quantized versions of models, please manually download quantized weights to the `i2v_model_path` or `t2v_model_path` directory where Gradio is started. ```
models/
├── wan2.2_i2v_A14b_high_noise_lightx2v_4step_1030.safetensors # high noise original precision
├── wan2.2_i2v_A14b_high_noise_fp8_e4m3_lightx2v_4step_1030.safetensors # high noise FP8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030.safetensors # high noise INT8 quantization
├── wan2.2_i2v_A14b_high_noise_int8_lightx2v_4step_1030_split # high noise INT8 quantization block storage directory
├── wan2.2_i2v_A14b_low_noise_lightx2v_4step.safetensors # low noise original precision
├── wan2.2_i2v_A14b_low_noise_fp8_e4m3_lightx2v_4step.safetensors # low noise FP8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step.safetensors # low noise INT8 quantization
├── wan2.2_i2v_A14b_low_noise_int8_lightx2v_4step_split # low noise INT8 quantization block storage directory
├── t5/clip/xlm-roberta-large/google # text and image encoder
├── vae/lightvae/lighttae # vae
└── config.json # Model configuration file
```
- **Quantized Version Only**: When downloading only quantized versions, in the `Gradio` Web frontend, the quantization precision for `DIT/T5/CLIP` can only be set to fp8 or int8 (depending on the weights you downloaded). If you need to use non-quantized versions of models, please manually download non-quantized weights to the `i2v_model_path` or `t2v_model_path` directory where Gradio is started. **📝 Download Instructions**:
- **Note**: Whether you download complete models or partial models, the values for `i2v_model_path` and `t2v_model_path` parameters should be the first-level directory paths. For example: `Wan2.1-I2V-14B-480P-Lightx2v/`, not `Wan2.1-I2V-14B-480P-Lightx2v/int8`. - Model weights can be downloaded from HuggingFace:
- [Wan2.1-Distill-Models](https://huggingface.co/lightx2v/Wan2.1-Distill-Models)
- [Wan2.2-Distill-Models](https://huggingface.co/lightx2v/Wan2.2-Distill-Models)
- Text and Image Encoders can be downloaded from [Encoders](https://huggingface.co/lightx2v/Encoderss)
- VAE can be downloaded from [Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
- For `xxx_split` directories (e.g., `wan2.1_i2v_720p_scaled_fp8_e4m3_lightx2v_4step_split`), which store multiple safetensors by block, suitable for devices with insufficient memory. For example, devices with 16GB or less memory should download according to their own situation.
### Startup Methods ### Startup Methods
...@@ -96,8 +97,7 @@ vim run_gradio.sh ...@@ -96,8 +97,7 @@ vim run_gradio.sh
# Configuration items that need to be modified: # Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path # - lightx2v_path: Lightx2v project root directory path
# - i2v_model_path: Image-to-video model path # - model_path: Model root directory path (contains all model files)
# - t2v_model_path: Text-to-video model path
# 💾 Important note: Recommend pointing model paths to SSD storage locations # 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: /mnt/ssd/models/ or /data/ssd/models/ # Example: /mnt/ssd/models/ or /data/ssd/models/
...@@ -105,11 +105,9 @@ vim run_gradio.sh ...@@ -105,11 +105,9 @@ vim run_gradio.sh
# 2. Run the startup script # 2. Run the startup script
bash run_gradio.sh bash run_gradio.sh
# 3. Or start with parameters (recommended using distilled models) # 3. Or start with parameters
bash run_gradio.sh --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032 bash run_gradio.sh --lang en --port 8032
bash run_gradio.sh --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032 bash run_gradio.sh --lang zh --port 7862
bash run_gradio.sh --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
bash run_gradio.sh --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
``` ```
**Windows Environment:** **Windows Environment:**
...@@ -120,8 +118,7 @@ notepad run_gradio_win.bat ...@@ -120,8 +118,7 @@ notepad run_gradio_win.bat
# Configuration items that need to be modified: # Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path # - lightx2v_path: Lightx2v project root directory path
# - i2v_model_path: Image-to-video model path # - model_path: Model root directory path (contains all model files)
# - t2v_model_path: Text-to-video model path
# 💾 Important note: Recommend pointing model paths to SSD storage locations # 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: D:\models\ or E:\models\ # Example: D:\models\ or E:\models\
...@@ -129,201 +126,101 @@ notepad run_gradio_win.bat ...@@ -129,201 +126,101 @@ notepad run_gradio_win.bat
# 2. Run the startup script # 2. Run the startup script
run_gradio_win.bat run_gradio_win.bat
# 3. Or start with parameters (recommended using distilled models) # 3. Or start with parameters
run_gradio_win.bat --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032 run_gradio_win.bat --lang en --port 8032
run_gradio_win.bat --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032 run_gradio_win.bat --lang zh --port 7862
run_gradio_win.bat --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
run_gradio_win.bat --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
``` ```
#### Method 2: Direct Command Line Startup #### Method 2: Direct Command Line Startup
```bash
pip install -v git+https://github.com/ModelTC/LightX2V.git
```
**Linux Environment:** **Linux Environment:**
**Image-to-Video Mode:** **English Interface Version:**
```bash ```bash
python gradio_demo.py \ python gradio_demo.py \
--model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \ --model_path /path/to/models \
--model_cls wan2.1 \
--model_size 14b \
--task i2v \
--server_name 0.0.0.0 \ --server_name 0.0.0.0 \
--server_port 7862 --server_port 7862
``` ```
**English Interface Version:** **Chinese Interface Version:**
```bash ```bash
python gradio_demo.py \ python gradio_demo_zh.py \
--model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \ --model_path /path/to/models \
--model_cls wan2.1_distill \
--model_size 14b \
--task t2v \
--server_name 0.0.0.0 \ --server_name 0.0.0.0 \
--server_port 7862 --server_port 7862
``` ```
**Windows Environment:** **Windows Environment:**
**Image-to-Video Mode:** **English Interface Version:**
```cmd ```cmd
python gradio_demo.py ^ python gradio_demo.py ^
--model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^ --model_path D:\models ^
--model_cls wan2.1 ^
--model_size 14b ^
--task i2v ^
--server_name 127.0.0.1 ^ --server_name 127.0.0.1 ^
--server_port 7862 --server_port 7862
``` ```
**English Interface Version:** **Chinese Interface Version:**
```cmd ```cmd
python gradio_demo.py ^ python gradio_demo_zh.py ^
--model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^ --model_path D:\models ^
--model_cls wan2.1_distill ^
--model_size 14b ^
--task t2v ^
--server_name 127.0.0.1 ^ --server_name 127.0.0.1 ^
--server_port 7862 --server_port 7862
``` ```
**💡 Tip**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 📋 Command Line Parameters ## 📋 Command Line Parameters
| Parameter | Type | Required | Default | Description | | Parameter | Type | Required | Default | Description |
|-----------|------|----------|---------|-------------| |-----------|------|----------|---------|-------------|
| `--model_path` | str | ✅ | - | Model folder path | | `--model_path` | str | ✅ | - | Model root directory path (directory containing all model files) |
| `--model_cls` | str | ❌ | wan2.1 | Model class: `wan2.1` (standard model) or `wan2.1_distill` (distilled model, faster inference) |
| `--model_size` | str | ✅ | - | Model size: `14b (image-to-video or text-to-video)` or `1.3b (text-to-video)` |
| `--task` | str | ✅ | - | Task type: `i2v` (image-to-video) or `t2v` (text-to-video) |
| `--server_port` | int | ❌ | 7862 | Server port | | `--server_port` | int | ❌ | 7862 | Server port |
| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address | | `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
| `--output_dir` | str | ❌ | ./outputs | Output video save directory |
**💡 Note**: Model type (wan2.1/wan2.2), task type (i2v/t2v), and specific model file selection are all configured in the Web interface.
## 🎯 Features ## 🎯 Features
### Basic Settings ### Model Configuration
- **Model Type**: Supports wan2.1 and wan2.2 model architectures
- **Task Type**: Supports Image-to-Video (i2v) and Text-to-Video (t2v) generation modes
- **Model Selection**: Frontend automatically identifies and filters available model files, supports automatic quantization precision detection
- **Encoder Configuration**: Supports selection of T5 text encoder, CLIP image encoder, and VAE decoder
- **Operator Selection**: Supports multiple attention operators and quantization matrix multiplication operators, system automatically sorts by installation status
### Input Parameters
#### Input Parameters
- **Prompt**: Describe the expected video content - **Prompt**: Describe the expected video content
- **Negative Prompt**: Specify elements you don't want to appear - **Negative Prompt**: Specify elements you don't want to appear
- **Input Image**: Upload input image required in i2v mode
- **Resolution**: Supports multiple preset resolutions (480p/540p/720p) - **Resolution**: Supports multiple preset resolutions (480p/540p/720p)
- **Random Seed**: Controls the randomness of generation results - **Random Seed**: Controls the randomness of generation results
- **Inference Steps**: Affects the balance between generation quality and speed - **Inference Steps**: Affects the balance between generation quality and speed (defaults to 4 steps for distilled models)
### Video Parameters
#### Video Parameters
- **FPS**: Frames per second - **FPS**: Frames per second
- **Total Frames**: Video length - **Total Frames**: Video length
- **CFG Scale Factor**: Controls prompt influence strength (1-10) - **CFG Scale Factor**: Controls prompt influence strength (1-10, defaults to 1 for distilled models)
- **Distribution Shift**: Controls generation style deviation degree (0-10) - **Distribution Shift**: Controls generation style deviation degree (0-10)
### Advanced Optimization Options ## 🔧 Auto-Configuration Feature
#### GPU Memory Optimization
- **Chunked Rotary Position Embedding**: Saves GPU memory
- **Rotary Embedding Chunk Size**: Controls chunk granularity
- **Clean CUDA Cache**: Promptly frees GPU memory
#### Asynchronous Offloading
- **CPU Offloading**: Transfers partial computation to CPU
- **Lazy Loading**: Loads model components on-demand, significantly reduces system memory consumption
- **Offload Granularity Control**: Fine-grained control of offloading strategies
#### Low-Precision Quantization
- **Attention Operators**: Flash Attention, Sage Attention, etc.
- **Quantization Operators**: vLLM, SGL, Q8F, etc.
- **Precision Modes**: FP8, INT8, BF16, etc.
#### VAE Optimization
- **Lightweight VAE**: Accelerates decoding process
- **VAE Tiling Inference**: Reduces memory usage
#### Feature Caching The system automatically configures optimal inference options based on your hardware configuration (GPU VRAM and CPU memory) without manual adjustment. The best configuration is automatically applied on startup, including:
- **Tea Cache**: Caches intermediate features to accelerate generation
- **Cache Threshold**: Controls cache trigger conditions
- **Key Step Caching**: Writes cache only at key steps
## 🔧 Auto-Configuration Feature - **GPU Memory Optimization**: Automatically enables CPU offloading, VAE tiling inference, etc. based on VRAM size
- **CPU Memory Optimization**: Automatically enables lazy loading, module unloading, etc. based on system memory
- **Operator Selection**: Automatically selects the best installed operators (sorted by priority)
- **Quantization Configuration**: Automatically detects and applies quantization precision based on model file names
After enabling "Auto-configure Inference Options", the system will automatically optimize parameters based on your hardware configuration:
### GPU Memory Rules
- **80GB+**: Default configuration, no optimization needed
- **48GB**: Enable CPU offloading, offload ratio 50%
- **40GB**: Enable CPU offloading, offload ratio 80%
- **32GB**: Enable CPU offloading, offload ratio 100%
- **24GB**: Enable BF16 precision, VAE tiling
- **16GB**: Enable chunked offloading, rotary embedding chunking
- **12GB**: Enable cache cleaning, lightweight VAE
- **8GB**: Enable quantization, lazy loading
### CPU Memory Rules
- **128GB+**: Default configuration
- **64GB**: Enable DIT quantization
- **32GB**: Enable lazy loading
- **16GB**: Enable full model quantization
## ⚠️ Important Notes
### 🚀 Low-Resource Device Optimization Recommendations
**💡 For devices with insufficient VRAM or performance constraints**:
- **🎯 Model Selection**: Prioritize using distilled version models (`wan2.1_distill`)
- **⚡ Inference Steps**: Recommend setting to 4 steps
- **🔧 CFG Settings**: Recommend disabling CFG option to improve generation speed
- **🔄 Auto-Configuration**: Enable "Auto-configure Inference Options"
- **💾 Storage Optimization**: Ensure models are stored on SSD for optimal loading performance
## 🎨 Interface Description
### Basic Settings Tab
- **Input Parameters**: Prompts, resolution, and other basic settings
- **Video Parameters**: FPS, frame count, CFG, and other video generation parameters
- **Output Settings**: Video save path configuration
### Advanced Options Tab
- **GPU Memory Optimization**: Memory management related options
- **Asynchronous Offloading**: CPU offloading and lazy loading
- **Low-Precision Quantization**: Various quantization optimization options
- **VAE Optimization**: Variational Autoencoder optimization
- **Feature Caching**: Cache strategy configuration
## 🔍 Troubleshooting
### Common Issues
**💡 Tip**: Generally, after enabling "Auto-configure Inference Options", the system will automatically optimize parameter settings based on your hardware configuration, and performance issues usually won't occur. If you encounter problems, please refer to the following solutions:
1. **Gradio Webpage Opens Blank**
- Try upgrading gradio: `pip install --upgrade gradio`
2. **CUDA Memory Insufficient**
- Enable CPU offloading
- Reduce resolution
- Enable quantization options
3. **System Memory Insufficient**
- Enable CPU offloading
- Enable lazy loading option
- Enable quantization options
4. **Slow Generation Speed**
- Reduce inference steps
- Enable auto-configuration
- Use lightweight models
- Enable Tea Cache
- Use quantization operators
- 💾 **Check if models are stored on SSD**
5. **Slow Model Loading**
- 💾 **Migrate models to SSD storage**
- Enable lazy loading option
- Check disk I/O performance
- Consider using NVMe SSD
6. **Poor Video Quality**
- Increase inference steps
- Increase CFG scale factor
- Use 14B models
- Optimize prompts
### Log Viewing ### Log Viewing
......
import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import torch import torch
...@@ -115,8 +116,6 @@ class WeightAsyncStreamManager(object): ...@@ -115,8 +116,6 @@ class WeightAsyncStreamManager(object):
self.prefetch_futures.append(future) self.prefetch_futures.append(future)
def swap_cpu_buffers(self): def swap_cpu_buffers(self):
import time
wait_start = time.time() wait_start = time.time()
already_done = all(f.done() for f in self.prefetch_futures) already_done = all(f.done() for f in self.prefetch_futures)
for f in self.prefetch_futures: for f in self.prefetch_futures:
...@@ -125,25 +124,11 @@ class WeightAsyncStreamManager(object): ...@@ -125,25 +124,11 @@ class WeightAsyncStreamManager(object):
logger.debug(f"[Prefetch] block {self.prefetch_block_idx}: wait={wait_time:.3f}s, already_done={already_done}") logger.debug(f"[Prefetch] block {self.prefetch_block_idx}: wait={wait_time:.3f}s, already_done={already_done}")
self.cpu_buffers = [self.cpu_buffers[1], self.cpu_buffers[0]] self.cpu_buffers = [self.cpu_buffers[1], self.cpu_buffers[0]]
def shutdown(self, wait=True): def __del__(self):
"""Shutdown the thread pool executor and wait for all pending tasks to complete."""
if hasattr(self, "executor") and self.executor is not None: if hasattr(self, "executor") and self.executor is not None:
# Wait for all pending futures to complete before shutting down for f in self.prefetch_futures:
if hasattr(self, "prefetch_futures"): if not f.done():
for f in self.prefetch_futures: f.result()
try: self.executor.shutdown(wait=False)
if not f.done():
f.result()
except Exception:
pass
self.executor.shutdown(wait=wait)
self.executor = None self.executor = None
logger.debug("ThreadPoolExecutor shut down successfully.") logger.debug("ThreadPoolExecutor shut down successfully.")
def __del__(self):
"""Cleanup method to ensure executor is shut down when object is destroyed."""
try:
if hasattr(self, "executor") and self.executor is not None:
self.executor.shutdown(wait=False)
except Exception:
pass
...@@ -178,7 +178,7 @@ class WanModel(CompiledMethodsMixin): ...@@ -178,7 +178,7 @@ class WanModel(CompiledMethodsMixin):
if os.path.exists(non_block_file): if os.path.exists(non_block_file):
safetensors_files = [non_block_file] safetensors_files = [non_block_file]
else: else:
raise ValueError(f"Non-block file not found in {safetensors_path}") raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
weight_dict = {} weight_dict = {}
for file_path in safetensors_files: for file_path in safetensors_files:
...@@ -221,7 +221,7 @@ class WanModel(CompiledMethodsMixin): ...@@ -221,7 +221,7 @@ class WanModel(CompiledMethodsMixin):
if os.path.exists(non_block_file): if os.path.exists(non_block_file):
safetensors_files = [non_block_file] safetensors_files = [non_block_file]
else: else:
raise ValueError(f"Non-block file not found in {safetensors_path}, Please check the lazy load model path") raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
weight_dict = {} weight_dict = {}
for safetensor_path in safetensors_files: for safetensor_path in safetensors_files:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment