Commit 3e4fe79b authored by GoatWu's avatar GoatWu
Browse files

Merge branch 'main' of github.com:ModelTC/lightx2v into main

parents 8ddd33a5 d013cac7
...@@ -403,6 +403,7 @@ def run_inference( ...@@ -403,6 +403,7 @@ def run_inference(
"rotary_chunk": rotary_chunk, "rotary_chunk": rotary_chunk,
"rotary_chunk_size": rotary_chunk_size, "rotary_chunk_size": rotary_chunk_size,
"clean_cuda_cache": clean_cuda_cache, "clean_cuda_cache": clean_cuda_cache,
"denoising_step_list": [1000, 750, 500, 250],
} }
args = argparse.Namespace( args = argparse.Namespace(
...@@ -818,18 +819,22 @@ def main(): ...@@ -818,18 +819,22 @@ def main():
randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed) randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed)
with gr.Column(): with gr.Column():
# Set default inference steps based on model class
default_infer_steps = 4 if model_cls == "wan2.1_distill" else 40
infer_steps = gr.Slider( infer_steps = gr.Slider(
label="Inference Steps", label="Inference Steps",
minimum=1, minimum=1,
maximum=100, maximum=100,
step=1, step=1,
value=40, value=default_infer_steps,
info="Number of inference steps for video generation. Increasing steps may improve quality but reduce speed.", info="Number of inference steps for video generation. Increasing steps may improve quality but reduce speed.",
) )
# Set default CFG based on model class
default_enable_cfg = False if model_cls == "wan2.1_distill" else True
enable_cfg = gr.Checkbox( enable_cfg = gr.Checkbox(
label="Enable Classifier-Free Guidance", label="Enable Classifier-Free Guidance",
value=True, value=default_enable_cfg,
info="Enable classifier-free guidance to control prompt strength", info="Enable classifier-free guidance to control prompt strength",
) )
cfg_scale = gr.Slider( cfg_scale = gr.Slider(
...@@ -1149,7 +1154,7 @@ def main(): ...@@ -1149,7 +1154,7 @@ def main():
outputs=output_video, outputs=output_video,
) )
demo.launch(share=True, server_port=args.server_port, server_name=args.server_name) demo.launch(share=True, server_port=args.server_port, server_name=args.server_name, inbrowser=True)
if __name__ == "__main__": if __name__ == "__main__":
...@@ -1158,9 +1163,9 @@ if __name__ == "__main__": ...@@ -1158,9 +1163,9 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--model_cls", "--model_cls",
type=str, type=str,
choices=["wan2.1"], choices=["wan2.1", "wan2.1_distill"],
default="wan2.1", default="wan2.1",
help="Model class to use", help="Model class to use (wan2.1: standard model, wan2.1_distill: distilled model for faster inference)",
) )
parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="Model type to use") parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="Model type to use")
parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="Specify the task type. 'i2v' for image-to-video translation, 't2v' for text-to-video generation.") parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="Specify the task type. 'i2v' for image-to-video translation, 't2v' for text-to-video generation.")
......
...@@ -405,6 +405,7 @@ def run_inference( ...@@ -405,6 +405,7 @@ def run_inference(
"rotary_chunk": rotary_chunk, "rotary_chunk": rotary_chunk,
"rotary_chunk_size": rotary_chunk_size, "rotary_chunk_size": rotary_chunk_size,
"clean_cuda_cache": clean_cuda_cache, "clean_cuda_cache": clean_cuda_cache,
"denoising_step_list": [1000, 750, 500, 250],
} }
args = argparse.Namespace( args = argparse.Namespace(
...@@ -818,18 +819,22 @@ def main(): ...@@ -818,18 +819,22 @@ def main():
randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed) randomize_btn.click(fn=generate_random_seed, inputs=None, outputs=seed)
with gr.Column(): with gr.Column():
# 根据模型类别设置默认推理步数
default_infer_steps = 4 if model_cls == "wan2.1_distill" else 40
infer_steps = gr.Slider( infer_steps = gr.Slider(
label="推理步数", label="推理步数",
minimum=1, minimum=1,
maximum=100, maximum=100,
step=1, step=1,
value=40, value=default_infer_steps,
info="视频生成的推理步数。增加步数可能提高质量但降低速度。", info="视频生成的推理步数。增加步数可能提高质量但降低速度。",
) )
# 根据模型类别设置默认CFG
default_enable_cfg = False if model_cls == "wan2.1_distill" else True
enable_cfg = gr.Checkbox( enable_cfg = gr.Checkbox(
label="启用无分类器引导", label="启用无分类器引导",
value=True, value=default_enable_cfg,
info="启用无分类器引导以控制提示词强度", info="启用无分类器引导以控制提示词强度",
) )
cfg_scale = gr.Slider( cfg_scale = gr.Slider(
...@@ -1147,7 +1152,7 @@ def main(): ...@@ -1147,7 +1152,7 @@ def main():
outputs=output_video, outputs=output_video,
) )
demo.launch(share=True, server_port=args.server_port, server_name=args.server_name) demo.launch(share=True, server_port=args.server_port, server_name=args.server_name, inbrowser=True)
if __name__ == "__main__": if __name__ == "__main__":
...@@ -1156,9 +1161,9 @@ if __name__ == "__main__": ...@@ -1156,9 +1161,9 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--model_cls", "--model_cls",
type=str, type=str,
choices=["wan2.1"], choices=["wan2.1", "wan2.1_distill"],
default="wan2.1", default="wan2.1",
help="要使用的模型类别", help="要使用的模型类别 (wan2.1: 标准模型, wan2.1_distill: 蒸馏模型,推理更快)",
) )
parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="模型大小:14b 或 1.3b") parser.add_argument("--model_size", type=str, required=True, choices=["14b", "1.3b"], help="模型大小:14b 或 1.3b")
parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="指定任务类型。'i2v'用于图像到视频转换,'t2v'用于文本到视频生成。") parser.add_argument("--task", type=str, required=True, choices=["i2v", "t2v"], help="指定任务类型。'i2v'用于图像到视频转换,'t2v'用于文本到视频生成。")
......
...@@ -28,6 +28,10 @@ t2v_model_path=/path/to/Wan2.1-T2V-1.3B ...@@ -28,6 +28,10 @@ t2v_model_path=/path/to/Wan2.1-T2V-1.3B
# Default model size (14b, 1.3b) # Default model size (14b, 1.3b)
model_size="14b" model_size="14b"
# Model class configuration
# Default model class (wan2.1, wan2.1_distill)
model_cls="wan2.1"
# Server configuration # Server configuration
server_name="0.0.0.0" server_name="0.0.0.0"
server_port=8032 server_port=8032
...@@ -72,6 +76,10 @@ while [[ $# -gt 0 ]]; do ...@@ -72,6 +76,10 @@ while [[ $# -gt 0 ]]; do
model_size="$2" model_size="$2"
shift 2 shift 2
;; ;;
--model_cls)
model_cls="$2"
shift 2
;;
--help) --help)
echo "🎬 Lightx2v Gradio Demo Startup Script" echo "🎬 Lightx2v Gradio Demo Startup Script"
echo "==========================================" echo "=========================================="
...@@ -90,6 +98,10 @@ while [[ $# -gt 0 ]]; do ...@@ -90,6 +98,10 @@ while [[ $# -gt 0 ]]; do
echo " Model size (default: 14b)" echo " Model size (default: 14b)"
echo " 14b: 14 billion parameters model" echo " 14b: 14 billion parameters model"
echo " 1.3b: 1.3 billion parameters model" echo " 1.3b: 1.3 billion parameters model"
echo " --model_cls MODEL_CLASS"
echo " Model class (default: wan2.1)"
echo " wan2.1: Standard model variant"
echo " wan2.1_distill: Distilled model variant for faster inference"
echo " --help Show this help message" echo " --help Show this help message"
echo "" echo ""
echo "🚀 Usage examples:" echo "🚀 Usage examples:"
...@@ -99,6 +111,7 @@ while [[ $# -gt 0 ]]; do ...@@ -99,6 +111,7 @@ while [[ $# -gt 0 ]]; do
echo " $0 --task i2v --gpu 1 --port 8032 # Use GPU 1" echo " $0 --task i2v --gpu 1 --port 8032 # Use GPU 1"
echo " $0 --task t2v --model_size 1.3b # Use 1.3B model" echo " $0 --task t2v --model_size 1.3b # Use 1.3B model"
echo " $0 --task i2v --model_size 14b # Use 14B model" echo " $0 --task i2v --model_size 14b # Use 14B model"
echo " $0 --task i2v --model_cls wan2.1_distill # Use distilled model"
echo "" echo ""
echo "📝 Notes:" echo "📝 Notes:"
echo " - Edit script to configure model paths before first use" echo " - Edit script to configure model paths before first use"
...@@ -132,6 +145,12 @@ if [[ "$model_size" != "14b" && "$model_size" != "1.3b" ]]; then ...@@ -132,6 +145,12 @@ if [[ "$model_size" != "14b" && "$model_size" != "1.3b" ]]; then
exit 1 exit 1
fi fi
# Validate model class
if [[ "$model_cls" != "wan2.1" && "$model_cls" != "wan2.1_distill" ]]; then
echo "Error: Model class must be 'wan2.1' or 'wan2.1_distill'"
exit 1
fi
# Select model path based on task type # Select model path based on task type
if [[ "$task" == "i2v" ]]; then if [[ "$task" == "i2v" ]]; then
model_path=$i2v_model_path model_path=$i2v_model_path
...@@ -181,6 +200,7 @@ echo "📁 Project path: $lightx2v_path" ...@@ -181,6 +200,7 @@ echo "📁 Project path: $lightx2v_path"
echo "🤖 Model path: $model_path" echo "🤖 Model path: $model_path"
echo "🎯 Task type: $task" echo "🎯 Task type: $task"
echo "🤖 Model size: $model_size" echo "🤖 Model size: $model_size"
echo "🤖 Model class: $model_cls"
echo "🌏 Interface language: $lang" echo "🌏 Interface language: $lang"
echo "🖥️ GPU device: $gpu_id" echo "🖥️ GPU device: $gpu_id"
echo "🌐 Server address: $server_name:$server_port" echo "🌐 Server address: $server_name:$server_port"
...@@ -208,6 +228,7 @@ echo "==========================================" ...@@ -208,6 +228,7 @@ echo "=========================================="
# Start Python demo # Start Python demo
python $demo_file \ python $demo_file \
--model_path "$model_path" \ --model_path "$model_path" \
--model_cls "$model_cls" \
--task "$task" \ --task "$task" \
--server_name "$server_name" \ --server_name "$server_name" \
--server_port "$server_port" \ --server_port "$server_port" \
......
@echo off
chcp 65001 >nul
echo 🎬 LightX2V Gradio Windows Startup Script
echo ==========================================
REM ==================== Configuration Area ====================
REM ⚠️ Important: Please modify the following paths according to your actual environment
REM 🚨 Storage Performance Tips 🚨
REM 💾 Strongly recommend storing model files on SSD solid-state drives!
REM 📈 SSD can significantly improve model loading speed and inference performance
REM 🐌 Using mechanical hard drives (HDD) may cause slow model loading and affect overall experience
REM LightX2V project root directory path
REM Example: D:\LightX2V
set lightx2v_path=/path/to/LightX2V
REM Model path configuration
REM Image-to-video model path (for i2v tasks)
REM Example: D:\models\Wan2.1-I2V-14B-480P-Lightx2v
set i2v_model_path=/path/to/Wan2.1-I2V-14B-480P-Lightx2v
REM Text-to-video model path (for t2v tasks)
REM Example: D:\models\Wan2.1-T2V-1.3B
set t2v_model_path=/path/to/Wan2.1-T2V-1.3B
REM Model size configuration
REM Default model size (14b, 1.3b)
set model_size=14b
REM Model class configuration
REM Default model class (wan2.1, wan2.1_distill)
set model_cls=wan2.1
REM Server configuration
set server_name=127.0.0.1
set server_port=8032
REM GPU configuration
set gpu_id=0
REM ==================== Environment Variables Setup ====================
set CUDA_VISIBLE_DEVICES=%gpu_id%
set PYTHONPATH=%lightx2v_path%;%PYTHONPATH%
set ENABLE_PROFILING_DEBUG=true
set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
REM ==================== Parameter Parsing ====================
REM Default task type
set task=i2v
REM Default interface language
set lang=zh
REM Parse command line arguments
:parse_args
if "%1"=="" goto :end_parse
if "%1"=="--task" (
set task=%2
shift
shift
goto :parse_args
)
if "%1"=="--lang" (
set lang=%2
shift
shift
goto :parse_args
)
if "%1"=="--port" (
set server_port=%2
shift
shift
goto :parse_args
)
if "%1"=="--gpu" (
set gpu_id=%2
set CUDA_VISIBLE_DEVICES=%gpu_id%
shift
shift
goto :parse_args
)
if "%1"=="--model_size" (
set model_size=%2
shift
shift
goto :parse_args
)
if "%1"=="--model_cls" (
set model_cls=%2
shift
shift
goto :parse_args
)
if "%1"=="--help" (
echo 🎬 LightX2V Gradio Windows Startup Script
echo ==========================================
echo Usage: %0 [options]
echo.
echo 📋 Available options:
echo --task i2v^|t2v Task type (default: i2v)
echo i2v: Image-to-video generation
echo t2v: Text-to-video generation
echo --lang zh^|en Interface language (default: zh)
echo zh: Chinese interface
echo en: English interface
echo --port PORT Server port (default: 8032)
echo --gpu GPU_ID GPU device ID (default: 0)
echo --model_size MODEL_SIZE
echo Model size (default: 14b)
echo 14b: 14B parameter model
echo 1.3b: 1.3B parameter model
echo --model_cls MODEL_CLASS
echo Model class (default: wan2.1)
echo wan2.1: Standard model variant
echo wan2.1_distill: Distilled model variant for faster inference
echo --help Show this help message
echo.
echo 🚀 Usage examples:
echo %0 # Default startup for image-to-video mode
echo %0 --task i2v --lang zh --port 8032 # Start with specified parameters
echo %0 --task t2v --lang en --port 7860 # Text-to-video with English interface
echo %0 --task i2v --gpu 1 --port 8032 # Use GPU 1
echo %0 --task t2v --model_size 1.3b # Use 1.3B model
echo %0 --task i2v --model_size 14b # Use 14B model
echo %0 --task i2v --model_cls wan2.1_distill # Use distilled model
echo.
echo 📝 Notes:
echo - Edit script to configure model paths before first use
echo - Ensure required Python dependencies are installed
echo - Recommended to use GPU with 8GB+ VRAM
echo - 🚨 Strongly recommend storing models on SSD for better performance
pause
exit /b 0
)
echo Unknown parameter: %1
echo Use --help to see help information
pause
exit /b 1
:end_parse
REM ==================== Parameter Validation ====================
if "%task%"=="i2v" goto :valid_task
if "%task%"=="t2v" goto :valid_task
echo Error: Task type must be 'i2v' or 't2v'
pause
exit /b 1
:valid_task
if "%lang%"=="zh" goto :valid_lang
if "%lang%"=="en" goto :valid_lang
echo Error: Language must be 'zh' or 'en'
pause
exit /b 1
:valid_lang
if "%model_size%"=="14b" goto :valid_size
if "%model_size%"=="1.3b" goto :valid_size
echo Error: Model size must be '14b' or '1.3b'
pause
exit /b 1
:valid_size
if "%model_cls%"=="wan2.1" goto :valid_cls
if "%model_cls%"=="wan2.1_distill" goto :valid_cls
echo Error: Model class must be 'wan2.1' or 'wan2.1_distill'
pause
exit /b 1
:valid_cls
REM Select model path based on task type
if "%task%"=="i2v" (
set model_path=%i2v_model_path%
echo 🎬 Starting Image-to-Video mode
) else (
set model_path=%t2v_model_path%
echo 🎬 Starting Text-to-Video mode
)
REM Check if model path exists
if not exist "%model_path%" (
echoError: Model path does not exist
echo 📁 Path: %model_path%
echo 🔧 Solutions:
echo 1. Check model path configuration in script
echo 2. Ensure model files are properly downloaded
echo 3. Verify path permissions are correct
echo 4. 💾 Recommend storing models on SSD for faster loading
pause
exit /b 1
)
REM Select demo file based on language
if "%lang%"=="zh" (
set demo_file=gradio_demo_zh.py
echo 🌏 Using Chinese interface
) else (
set demo_file=gradio_demo.py
echo 🌏 Using English interface
)
REM Check if demo file exists
if not exist "%demo_file%" (
echoError: Demo file does not exist
echo 📄 File: %demo_file%
echo 🔧 Solutions:
echo 1. Ensure script is run in the correct directory
echo 2. Check if file has been renamed or moved
echo 3. Re-clone or download project files
pause
exit /b 1
)
REM ==================== System Information Display ====================
echo ==========================================
echo 🚀 LightX2V Gradio Starting...
echo ==========================================
echo 📁 Project path: %lightx2v_path%
echo 🤖 Model path: %model_path%
echo 🎯 Task type: %task%
echo 🤖 Model size: %model_size%
echo 🤖 Model class: %model_cls%
echo 🌏 Interface language: %lang%
echo 🖥️ GPU device: %gpu_id%
echo 🌐 Server address: %server_name%:%server_port%
echo ==========================================
REM Display system resource information
echo 💻 System resource information:
wmic OS get TotalVisibleMemorySize,FreePhysicalMemory /format:table
REM Display GPU information
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits 2>nul
if errorlevel 1 (
echo 🎮 GPU information: Unable to get GPU info
) else (
echo 🎮 GPU information:
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader,nounits
)
REM ==================== Start Demo ====================
echo 🎬 Starting Gradio demo...
echo 📱 Please access in browser: http://%server_name%:%server_port%
echo ⏹️ Press Ctrl+C to stop service
echo 🔄 First startup may take several minutes to load resources...
echo ==========================================
REM Start Python demo
python %demo_file% ^
--model_path "%model_path%" ^
--model_cls %model_cls% ^
--task %task% ^
--server_name %server_name% ^
--server_port %server_port% ^
--model_size %model_size%
REM Display final system resource usage
echo.
echo ==========================================
echo 📊 Final system resource usage:
wmic OS get TotalVisibleMemorySize,FreePhysicalMemory /format:table
pause
...@@ -13,5 +13,6 @@ ...@@ -13,5 +13,6 @@
"cpu_offload": false, "cpu_offload": false,
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm"
} },
"use_tiling_vae": true
} }
{ {
"infer_steps": 40, "infer_steps": 4,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480, // 720
"target_width": 832, // 1280 "target_width": 832, // 1280
...@@ -9,9 +9,11 @@ ...@@ -9,9 +9,11 @@
"seed": 42, //1234 "seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": false,
"cpu_offload": false, "cpu_offload": false,
"denoising_step_list": [1000, 750, 500, 250],
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm"
} },
"use_tiling_vae": true
} }
...@@ -24,5 +24,6 @@ ...@@ -24,5 +24,6 @@
// [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683] // [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683]
// ], // ],
"use_ret_steps": false, "use_ret_steps": false,
"teacache_thresh": 0.2 "teacache_thresh": 0.2,
"use_tiling_vae": true
} }
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480, // 720
"target_width": 832, // 1280
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": true,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 0.8, //1
"t5_cpu_offload": true,
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
},
"use_tiling_vae": true
}
{
"infer_steps": 4,
"target_video_length": 81,
"target_height": 480, // 720
"target_width": 832, // 1280
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 0.8, //1
"t5_cpu_offload": true,
"denoising_step_list": [1000, 750, 500, 250],
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
},
"use_tiling_vae": true
}
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480, // 720
"target_width": 832, // 1280
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": true,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 0.8, //1
"t5_cpu_offload": true,
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
},
"use_tiling_vae": true
}
{
"infer_steps": 4,
"target_video_length": 81,
"target_height": 480, // 720
"target_width": 832, // 1280
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 0.8, //1
"t5_cpu_offload": true,
"denoising_step_list": [1000, 750, 500, 250],
"mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
},
"use_tiling_vae": true
}
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"seed": 442,
"sample_guide_scale": 5,
"sample_shift": 3,
"enable_cfg": true,
"cpu_offload": false,
"changing_resolution": true,
"resolution_rate": [0.75],
"changing_resolution_steps": [20]
}
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"seed": 442,
"sample_guide_scale": 5,
"sample_shift": 3,
"enable_cfg": true,
"cpu_offload": false,
"changing_resolution": true,
"resolution_rate": [1.0, 0.75],
"changing_resolution_steps": [5, 25]
}
...@@ -13,6 +13,6 @@ ...@@ -13,6 +13,6 @@
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": false, "cpu_offload": false,
"changing_resolution": true, "changing_resolution": true,
"resolution_rate": 0.75, "resolution_rate": [0.75],
"changing_resolution_steps": 25 "changing_resolution_steps": [25]
} }
{
"infer_steps": 50,
"target_video_length": 81,
"text_len": 512,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6,
"sample_shift": 8,
"enable_cfg": true,
"cpu_offload": false,
"changing_resolution": true,
"resolution_rate": [1.0, 0.75],
"changing_resolution_steps": [10, 35]
}
{
"infer_steps": 4,
"target_video_length": 81,
"target_height": 480,
"target_width": 832,
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"mm_config": {
"mm_type": "W-int8-channel-sym-A-int8-channel-sym-dynamic-Torchao"
},
"t5_quantized": true,
"t5_quant_scheme": "int8-torchao",
"clip_quantized": true,
"clip_quant_scheme": "int8-torchao"
}
# Gradio Deployment # Gradio Deployment Guide
## 📖 Overview ## 📖 Overview
Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes. Lightx2v is a lightweight video inference and generation engine that provides a web interface based on Gradio, supporting both Image-to-Video and Text-to-Video generation modes.
## 📁 File Structure
```
LightX2V/app/
├── gradio_demo.py # English interface demo
├── gradio_demo_zh.py # Chinese interface demo
├── run_gradio.sh # Startup script
├── README.md # Documentation
├── saved_videos/ # Generated video save directory
└── inference_logs.log # Inference logs
```
This project contains two main demo files: This project contains two main demo files:
- `gradio_demo.py` - English interface version - `gradio_demo.py` - English interface version
- `gradio_demo_zh.py` - Chinese interface version - `gradio_demo_zh.py` - Chinese interface version
## 🚀 Quick Start ## 🚀 Quick Start
### System Requirements ### Environment Requirements
- Python 3.10+ (recommended) Follow the [Quick Start Guide](../getting_started/quickstart.md) to install the environment
- CUDA 12.4+ (recommended)
- At least 8GB GPU VRAM
- At least 16GB system memory (preferably at least 32GB)
- At least 128GB SSD solid-state drive (**💾 Strongly recommend using SSD solid-state drives to store model files! During "lazy loading" startup, significantly improves model loading speed and inference performance**)
### Install Dependencies
```bash
# Install basic dependencies
pip install -r requirements.txt
pip install gradio
```
#### Recommended Optimization Library Configuration #### Recommended Optimization Library Configuration
...@@ -34,6 +34,8 @@ pip install gradio ...@@ -34,6 +34,8 @@ pip install gradio
-[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel) -[sglang-kernel](https://github.com/sgl-project/sglang/tree/main/sgl-kernel)
-[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs) -[q8-kernel](https://github.com/KONAKONA666/q8_kernels) (only supports ADA architecture GPUs)
Install according to the project homepage tutorials for each operator as needed
### 🤖 Supported Models ### 🤖 Supported Models
#### 🎬 Image-to-Video Models #### 🎬 Image-to-Video Models
...@@ -54,15 +56,21 @@ pip install gradio ...@@ -54,15 +56,21 @@ pip install gradio
| ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | Distilled optimized version | High quality + fast inference | | ✅ [Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v) | 14B | Distilled optimized version | High quality + fast inference |
**💡 Model Selection Recommendations**: **💡 Model Selection Recommendations**:
- **First-time use**: Recommend choosing distilled versions - **First-time use**: Recommend choosing distilled versions (`wan2.1_distill`)
- **Pursuing quality**: Choose 720p resolution or 14B parameter models - **Pursuing quality**: Choose 720p resolution or 14B parameter models
- **Pursuing speed**: Choose 480p resolution or 1.3B parameter models - **Pursuing speed**: Choose 480p resolution or 1.3B parameter models, prioritize distilled versions
- **Resource-constrained**: Prioritize distilled versions and lower resolutions - **Resource-constrained**: Prioritize distilled versions and lower resolutions
- **Real-time applications**: Strongly recommend using distilled models (`wan2.1_distill`)
**🎯 Model Category Description**:
- **`wan2.1`**: Standard model, provides the best video generation quality, suitable for scenarios with extremely high quality requirements
- **`wan2.1_distill`**: Distilled model, optimized through knowledge distillation technology, significantly improves inference speed, maintains good quality while greatly reducing computation time, suitable for most application scenarios
### Startup Methods ### Startup Methods
#### Method 1: Using Startup Script (Recommended) #### Method 1: Using Startup Script (Recommended)
**Linux Environment:**
```bash ```bash
# 1. Edit the startup script to configure relevant paths # 1. Edit the startup script to configure relevant paths
cd app/ cd app/
...@@ -79,41 +87,84 @@ vim run_gradio.sh ...@@ -79,41 +87,84 @@ vim run_gradio.sh
# 2. Run the startup script # 2. Run the startup script
bash run_gradio.sh bash run_gradio.sh
# 3. Or start with parameters (recommended) # 3. Or start with parameters (recommended using distilled models)
bash run_gradio.sh --task i2v --lang en --model_size 14b --port 8032 bash run_gradio.sh --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
# bash run_gradio.sh --task i2v --lang en --model_size 14b --port 8032 bash run_gradio.sh --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
# bash run_gradio.sh --task i2v --lang en --model_size 1.3b --port 8032 bash run_gradio.sh --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
bash run_gradio.sh --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
```
**Windows Environment:**
```cmd
# 1. Edit the startup script to configure relevant paths
cd app\
notepad run_gradio_win.bat
# Configuration items that need to be modified:
# - lightx2v_path: Lightx2v project root directory path
# - i2v_model_path: Image-to-video model path
# - t2v_model_path: Text-to-video model path
# 💾 Important note: Recommend pointing model paths to SSD storage locations
# Example: D:\models\ or E:\models\
# 2. Run the startup script
run_gradio_win.bat
# 3. Or start with parameters (recommended using distilled models)
run_gradio_win.bat --task i2v --lang en --model_cls wan2.1 --model_size 14b --port 8032
run_gradio_win.bat --task t2v --lang en --model_cls wan2.1 --model_size 1.3b --port 8032
run_gradio_win.bat --task i2v --lang en --model_cls wan2.1_distill --model_size 14b --port 8032
run_gradio_win.bat --task t2v --lang en --model_cls wan2.1_distill --model_size 1.3b --port 8032
``` ```
#### Method 2: Direct Command Line Startup #### Method 2: Direct Command Line Startup
**Linux Environment:**
**Image-to-Video Mode:** **Image-to-Video Mode:**
```bash ```bash
python gradio_demo.py \ python gradio_demo.py \
--model_path /path/to/Wan2.1-I2V-14B-720P-Lightx2v \ --model_path /path/to/Wan2.1-I2V-14B-480P-Lightx2v \
--model_cls wan2.1 \
--model_size 14b \ --model_size 14b \
--task i2v \ --task i2v \
--server_name 0.0.0.0 \ --server_name 0.0.0.0 \
--server_port 7862 --server_port 7862
``` ```
**Text-to-Video Mode:** **English Interface Version:**
```bash ```bash
python gradio_demo.py \ python gradio_demo.py \
--model_path /path/to/Wan2.1-T2V-1.3B \ --model_path /path/to/Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v \
--model_size 1.3b \ --model_cls wan2.1_distill \
--model_size 14b \
--task t2v \ --task t2v \
--server_name 0.0.0.0 \ --server_name 0.0.0.0 \
--server_port 7862 --server_port 7862
``` ```
**Chinese Interface Version:** **Windows Environment:**
```bash
python gradio_demo_zh.py \ **Image-to-Video Mode:**
--model_path /path/to/model \ ```cmd
--model_size 14b \ python gradio_demo.py ^
--task i2v \ --model_path D:\models\Wan2.1-I2V-14B-480P-Lightx2v ^
--server_name 0.0.0.0 \ --model_cls wan2.1 ^
--model_size 14b ^
--task i2v ^
--server_name 127.0.0.1 ^
--server_port 7862
```
**English Interface Version:**
```cmd
python gradio_demo.py ^
--model_path D:\models\Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v ^
--model_cls wan2.1_distill ^
--model_size 14b ^
--task t2v ^
--server_name 127.0.0.1 ^
--server_port 7862 --server_port 7862
``` ```
...@@ -122,8 +173,8 @@ python gradio_demo_zh.py \ ...@@ -122,8 +173,8 @@ python gradio_demo_zh.py \
| Parameter | Type | Required | Default | Description | | Parameter | Type | Required | Default | Description |
|-----------|------|----------|---------|-------------| |-----------|------|----------|---------|-------------|
| `--model_path` | str | ✅ | - | Model folder path | | `--model_path` | str | ✅ | - | Model folder path |
| `--model_cls` | str | ❌ | wan2.1 | Model class (currently only supports wan2.1) | | `--model_cls` | str | ❌ | wan2.1 | Model class: `wan2.1` (standard model) or `wan2.1_distill` (distilled model, faster inference) |
| `--model_size` | str | ✅ | - | Model size: `14b(t2v or i2v)` or `1.3b(t2v)` | | `--model_size` | str | ✅ | - | Model size: `14b (image-to-video or text-to-video)` or `1.3b (text-to-video)` |
| `--task` | str | ✅ | - | Task type: `i2v` (image-to-video) or `t2v` (text-to-video) | | `--task` | str | ✅ | - | Task type: `i2v` (image-to-video) or `t2v` (text-to-video) |
| `--server_port` | int | ❌ | 7862 | Server port | | `--server_port` | int | ❌ | 7862 | Server port |
| `--server_name` | str | ❌ | 0.0.0.0 | Server IP address | | `--server_name` | str | ❌ | 0.0.0.0 | Server IP address |
...@@ -197,23 +248,11 @@ After enabling "Auto-configure Inference Options", the system will automatically ...@@ -197,23 +248,11 @@ After enabling "Auto-configure Inference Options", the system will automatically
**💡 For devices with insufficient VRAM or performance constraints**: **💡 For devices with insufficient VRAM or performance constraints**:
- **🎯 Model Selection**: Prioritize using distilled version models (StepDistill-CfgDistill) - **🎯 Model Selection**: Prioritize using distilled version models (`wan2.1_distill`)
- **⚡ Inference Steps**: Recommend setting to 4 steps - **⚡ Inference Steps**: Recommend setting to 4 steps
- **🔧 CFG Settings**: Recommend disabling CFG option to improve generation speed - **🔧 CFG Settings**: Recommend disabling CFG option to improve generation speed
- **🔄 Auto-Configuration**: Enable "Auto-configure Inference Options" - **🔄 Auto-Configuration**: Enable "Auto-configure Inference Options"
- **💾 Storage Optimization**: Ensure models are stored on SSD for optimal loading performance
## 📁 File Structure
```
lightx2v/app/
├── gradio_demo.py # English interface demo
├── gradio_demo_zh.py # Chinese interface demo
├── run_gradio.sh # Startup script
├── README.md # Documentation
├── saved_videos/ # Generated video save directory
└── inference_logs.log # Inference logs
```
## 🎨 Interface Description ## 🎨 Interface Description
...@@ -278,5 +317,6 @@ nvidia-smi ...@@ -278,5 +317,6 @@ nvidia-smi
htop htop
``` ```
Welcome to submit Issues and Pull Requests to improve this project!
**Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes. **Note**: Please comply with relevant laws and regulations when using videos generated by this tool, and do not use them for illegal purposes.
# Local Windows Deployment Guide # Windows Local Deployment Guide
This document provides detailed instructions for deploying LightX2V locally on Windows environments. ## 📖 Overview
## System Requirements This document provides detailed instructions for deploying LightX2V locally on Windows environments, including batch file inference, Gradio Web interface inference, and other usage methods.
Before getting started, please ensure your system meets the following requirements: ## 🚀 Quick Start
- **Operating System**: Windows 10/11 ### Environment Requirements
- **Graphics Card**: NVIDIA GPU (with CUDA support)
- **VRAM**: At least 8GB VRAM
- **Memory**: At least 16GB RAM
- **Storage**: 20GB+ available disk space
- **Environment Manager**: Anaconda or Miniconda installed
- **Network Tools**: Git (for cloning repositories)
## Deployment Steps
### Step 1: Check CUDA Version
First, verify your GPU driver and CUDA version by running the following command in Command Prompt:
```bash
nvidia-smi
```
Note the **CUDA Version** displayed in the output, as you'll need to match this version during subsequent installations.
### Step 2: Create Python Environment
Create an isolated conda environment, we recommend using Python 3.12: #### Hardware Requirements
- **GPU**: NVIDIA GPU, recommended 8GB+ VRAM
- **Memory**: Recommended 16GB+ RAM
- **Storage**: Strongly recommended to use SSD solid-state drives, mechanical hard drives will cause slow model loading
```bash #### Software Requirements
# Create new environment (using Python 3.12 as example) - **Operating System**: Windows 10/11
conda create -n lightx2v python=3.12 -y - **Python**: 3.12 or higher version
- **CUDA**: 12.4 or higher version
- **Dependencies**: Refer to LightX2V project's requirements_win.txt
# Activate environment ## 🎯 Usage Methods
conda activate lightx2v
```
> 💡 **Tip**: Python 3.10 or higher is recommended for optimal compatibility. ### Method 1: Using Batch File Inference
### Step 3: Install PyTorch Framework Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, and use [batch files](https://github.com/ModelTC/LightX2V/tree/main/scripts/win) to run.
#### Method 1: Download Official Wheel Packages (Recommended) ### Method 2: Using Gradio Web Interface Inference
1. Visit the [PyTorch Official Wheel Download Page](https://download.pytorch.org/whl/torch/) #### Manual Gradio Configuration
2. Select the appropriate wheel package, ensuring you match:
- **Python Version**: Must match your environment (cp312 means Python 3.12)
- **CUDA Version**: Must match your GPU driver
- **Platform**: Choose Windows version (win_amd64)
**Example for Python 3.12 + PyTorch 2.6 + CUDA 12.4:** Refer to [Quick Start Guide](../getting_started/quickstart.md) to install environment, refer to [Gradio Deployment Guide](./deploy_gradio.md)
``` #### One-Click Gradio Startup (Recommended)
torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
```
After downloading, install the packages: **📦 Download Software Package**
- [Baidu Cloud](https://pan.baidu.com/s/1ef3hEXyIuO0z6z9MoXe4nQ?pwd=7g4f)
- [Quark Cloud](https://pan.quark.cn/s/36a0cdbde7d9)
```bash **📁 Directory Structure**
# Install PyTorch (replace with actual file path) After extraction, ensure the directory structure is as follows:
pip install torch-2.6.0+cu124-cp312-cp312-win_amd64.whl
# Install accompanying vision and audio packages
pip install torchvision==0.21.0 torchaudio==2.6.0
``` ```
├── env/ # LightX2V environment directory
#### Method 2: Direct pip Installation ├── LightX2V/ # LightX2V project directory
├── start_lightx2v.bat # One-click startup script
If you prefer direct installation, use the following command: ├── lightx2v_config.txt # Configuration file
├── LightX2V使用说明.txt # LightX2V usage instructions
```bash └── models/ # Model storage directory
# Example: CUDA 12.4 version ├── 说明.txt # Model documentation
pip install torch==2.6.0+cu124 torchvision==0.21.0+cu124 torchaudio==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124 ├── Wan2.1-I2V-14B-480P-Lightx2v/ # Image-to-video model (480P)
├── Wan2.1-I2V-14B-720P-Lightx2v/ # Image-to-video model (720P)
├── Wan2.1-I2V-14B-480P-StepDistill-CfgDistil-Lightx2v/ # Image-to-video model (4-step distillation, 480P)
├── Wan2.1-I2V-14B-720P-StepDistill-CfgDistil-Lightx2v/ # Image-to-video model (4-step distillation, 720P)
├── Wan2.1-T2V-1.3B-Lightx2v/ # Text-to-video model (1.3B parameters)
├── Wan2.1-T2V-14B-Lightx2v/ # Text-to-video model (14B parameters)
└── Wan2.1-T2V-14B-StepDistill-CfgDistill-Lightx2v/ # Text-to-video model (4-step distillation)
``` ```
### Step 4: Install Windows Version vLLM **📋 Configuration Parameters**
Download the corresponding wheel package from the [vllm-windows releases page](https://github.com/SystemPanic/vllm-windows/releases). Edit the `lightx2v_config.txt` file and modify the following parameters as needed:
**Version Matching Requirements:** ```ini
- Python version must match (e.g., cp312) # Task type (i2v: image-to-video, t2v: text-to-video)
- PyTorch version must match task=i2v
- CUDA version must match
**Recommended v0.9.1 Installation:** # Interface language (zh: Chinese, en: English)
lang=en
```bash # Server port
pip install vllm-0.9.1+cu124-cp312-cp312-win_amd64.whl port=8032
```
> ⚠️ **Note**: Please select the appropriate wheel package filename based on your specific environment.
### Step 5: Install Attention Mechanism Operators # GPU device ID (0, 1, 2...)
gpu=0
You can choose to install either Flash Attention 2 or SageAttention 2. **SageAttention 2 is strongly recommended**. # Model size (14b: 14B parameter model, 1.3b: 1.3B parameter model)
model_size=14b
#### Option A: Flash Attention 2 # Model class (wan2.1: standard model, wan2.1_distill: distilled model)
model_cls=wan2.1
```bash
pip install flash-attn==2.7.2.post1
``` ```
#### Option B: SageAttention 2 (Recommended) **⚠️ Important Note**: If using distilled models (model names containing StepDistill-CfgDistil field), please set `model_cls` to `wan2.1_distill`
**Download Sources:**
- [Windows Version 1](https://github.com/woct0rdho/SageAttention/releases)
- [Windows Version 2](https://github.com/sdbds/SageAttention-for-windows/releases)
**Version Selection Guidelines:** **🚀 Start Service**
- Python version must match
- PyTorch version must match
- **CUDA version can be flexible** (SageAttention doesn't use breaking APIs yet)
**Recommended Installation Version:** Double-click to run the `start_lightx2v.bat` file, the script will:
1. Automatically read configuration file
2. Verify model paths and file integrity
3. Start Gradio Web interface
4. Automatically open browser to access service
```bash **💡 Usage Suggestion**: After opening the Gradio Web page, it's recommended to check "Auto-configure Inference Options", the system will automatically select appropriate optimization configurations for your machine. When reselecting resolution, you also need to re-check "Auto-configure Inference Options".
pip install sageattention-2.1.1+cu126torch2.6.0-cp312-cp312-win_amd64.whl
```
**Verify SageAttention Installation:** **⚠️ Important Note**: On first run, the system will automatically extract the environment file `env.zip`, which may take several minutes. Please be patient. Subsequent launches will skip this step. You can also manually extract the `env.zip` file to the current directory to save time on first startup.
After installation, we recommend running a verification script to ensure proper functionality: ### Method 3: Using ComfyUI Inference
> 📝 **Testing**: You can also run the [official test script](https://github.com/woct0rdho/SageAttention/blob/main/tests/test_sageattn.py) for more detailed functionality verification. This guide will instruct you on how to download and use the portable version of the Lightx2v-ComfyUI environment, so you can avoid manual environment configuration steps. This is suitable for users who want to quickly start experiencing accelerated video generation with Lightx2v on Windows systems.
### Step 6: Get LightX2V Project Code #### Download the Windows Portable Environment:
Clone the LightX2V project from GitHub and install Windows-specific dependencies: - [Baidu Cloud Download](https://pan.baidu.com/s/1FVlicTXjmXJA1tAVvNCrBw?pwd=wfid), extraction code: wfid
```bash The portable environment already packages all Python runtime dependencies, including the code and dependencies for ComfyUI and LightX2V. After downloading, simply extract to use.
# Clone project code
git clone https://github.com/ModelTC/LightX2V.git
# Enter project directory After extraction, the directory structure is as follows:
cd LightX2V
# Install Windows-specific dependencies ```shell
pip install -r requirements_win.txt lightx2v_env
├──📂 ComfyUI # ComfyUI code
├──📂 portable_python312_embed # Standalone Python environment
└── run_nvidia_gpu.bat # Windows startup script (double-click to start)
``` ```
> 🔍 **Note**: We use `requirements_win.txt` instead of the standard `requirements.txt` because Windows environments may require specific package versions or additional dependencies. #### Start ComfyUI
## Troubleshooting
### 1. CUDA Version Mismatch
**Symptoms**: CUDA-related errors occur
**Solutions**:
- Verify GPU driver supports required CUDA version
- Re-download matching wheel packages
- Use `nvidia-smi` to check maximum supported CUDA version
### 2. Dependency Conflicts
**Symptoms**: Package version conflicts or import errors
**Solutions**:
- Remove existing environment: `conda env remove -n lightx2v`
- Recreate environment and install dependencies strictly by version requirements
- Use virtual environments to isolate dependencies for different projects
### 3. Wheel Package Download Issues
**Symptoms**: Slow download speeds or connection failures
**Solutions**: Directly double-click the run_nvidia_gpu.bat file. The system will open a Command Prompt window and run the program. The first startup may take a while, please be patient. After startup is complete, the browser will automatically open and display the ComfyUI frontend interface.
- Use download tools or browser for direct downloads
- Look for domestic mirror sources
- Check network connections and firewall settings
## Next Steps ![i2v example workflow](../../../../assets/figs/portabl_windows/pic1.png)
After completing the environment setup, you can: The plugin used by LightX2V-ComfyUI is [ComfyUI-Lightx2vWrapper](https://github.com/ModelTC/ComfyUI-Lightx2vWrapper). Example workflows can be obtained from this project.
- 📚 Check the [Quick Start Guide](../getting_started/quickstart.md) (skip environment installation steps) #### Tested Graphics Cards (offload mode)
- 🌐 Use the [Gradio Web Interface](./deploy_gradio.md) for visual operations (skip environment installation steps)
## Version Compatibility Reference - Tested model: `Wan2.1-I2V-14B-480P`
| Component | Recommended Version | | GPU Model | Task Type | VRAM Capacity | Actual Max VRAM Usage | Actual Max RAM Usage |
|-----------|-------------------| |:-----------|:------------|:--------------|:---------------------|:---------------------|
| Python | 3.12 | | 3090Ti | I2V | 24G | 6.1G | 7.1G |
| PyTorch | 2.6.0+cu124 | | 3080Ti | I2V | 12G | 6.1G | 7.1G |
| vLLM | 0.9.1+cu124 | | 3060Ti | I2V | 8G | 6.1G | 7.1G |
| SageAttention | 2.1.1+cu126torch2.6.0 |
| CUDA | 12.4+ |
---
💡 **Pro Tip**: If you encounter other issues, we recommend first checking whether all component versions match properly, as most problems stem from version incompatibilities. #### Environment Packaging and Usage Reference
- [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
- [Portable-Windows-ComfyUI-Docs](https://docs.comfy.org/zh-CN/installation/comfyui_portable_windows#portable-%E5%8F%8A%E8%87%AA%E9%83%A8%E7%BD%B2)
# 低延迟场景部署 # Deployment for Low Latency Scenarios
xxx In low latency scenarios, we pursue faster speed, ignoring issues such as video memory and RAM overhead. We provide two solutions:
## 💡 Solution 1: Inference with Step Distillation Model
This solution can refer to the [Step Distillation Documentation](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/step_distill.html)
🧠 **Step Distillation** is a very direct acceleration inference solution for video generation models. By distilling from 50 steps to 4 steps, the time consumption will be reduced to 4/50 of the original. At the same time, under this solution, it can still be combined with the following solutions:
1. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html)
2. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html)
## 💡 Solution 2: Inference with Non-Step Distillation Model
Step distillation requires relatively large training resources, and the model after step distillation may have degraded video dynamic range.
For the original model without step distillation, we can use the following solutions or a combination of multiple solutions for acceleration:
1. [Parallel Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/parallel.html) for multi-GPU parallel acceleration.
2. [Feature Caching](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/cache.html) to reduce the actual inference steps.
3. [Efficient Attention Mechanism Solution](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/attention.html) to accelerate Attention inference.
4. [Model Quantization](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/quantization.html) to accelerate Linear layer inference.
5. [Variable Resolution Inference](https://lightx2v-en.readthedocs.io/en/latest/method_tutorials/changing_resolution.html) to reduce the resolution of intermediate inference steps.
## ⚠️ Note
Some acceleration solutions currently cannot be used together, and we are working to resolve this issue.
If you have any questions, feel free to report bugs or request features in [🐛 GitHub Issues](https://github.com/ModelTC/lightx2v/issues)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment