Commit e2778d0d authored by litzh's avatar litzh
Browse files

Initial commit

parents
Pipeline #3370 canceled with stages
"""
HunyuanVideo-1.5 text-to-video generation example.
This example demonstrates how to use LightX2V with HunyuanVideo-1.5 4-step distilled model for T2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for HunyuanVideo-1.5
pipe = LightX2VPipeline(
model_path="/path/to/ckpts/hunyuanvideo-1.5/",
model_cls="hunyuan_video_1.5",
transformer_model_name="480p_t2v",
task="t2v",
# 4-step distilled model ckpt
dit_original_ckpt="/path/to/hy1.5_t2v_480p_lightx2v_4step.safetensors",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(config_json="../configs/hunyuan_video_15/hunyuan_video_t2v_720p.json")
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block", # For HunyuanVideo-1.5, only "block" is supported
text_encoder_offload=True,
image_encoder_offload=False,
vae_offload=False,
)
# Use lighttae
pipe.enable_lightvae(
use_tae=True,
tae_path="/path/to/lighttaehy1_5.safetensors",
use_lightvae=False,
vae_path=None,
)
# Create generator with specified parameters
pipe.create_generator(attn_mode="sage_attn2", infer_steps=4, num_frames=81, guidance_scale=1, sample_shift=9.0, aspect_ratio="16:9", fps=16, denoising_step_list=[1000, 750, 500, 250])
# Generation parameters
seed = 123
prompt = "A close-up shot captures a scene on a polished, light-colored granite kitchen counter, illuminated by soft natural light from an unseen window. Initially, the frame focuses on a tall, clear glass filled with golden, translucent apple juice standing next to a single, shiny red apple with a green leaf still attached to its stem. The camera moves horizontally to the right. As the shot progresses, a white ceramic plate smoothly enters the frame, revealing a fresh arrangement of about seven or eight more apples, a mix of vibrant reds and greens, piled neatly upon it. A shallow depth of field keeps the focus sharply on the fruit and glass, while the kitchen backsplash in the background remains softly blurred. The scene is in a realistic style."
negative_prompt = ""
save_result_path = "/data/nvme0/gushiqiao/LightX2V/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
LongCat Image Edit (I2I) generation example.
This example demonstrates how to use LightX2V with LongCat-Image-Edit model for I2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for LongCat-Image-Edit I2I task
pipe = LightX2VPipeline(
model_path="/data/nvme1/models/meituan-longcat/LongCat-Image-Edit",
model_cls="longcat_image",
task="i2i",
)
# Create generator from config JSON file
pipe.create_generator(config_json="/workspace/configs/longcat_image/longcat_image_i2i.json")
# Generation parameters
seed = 43
prompt = "将猫变成狗"
negative_prompt = ""
image_path = "/data/nvme1/models/meituan-longcat/LongCat-Image-Edit/assets/test.png"
save_result_path = "/workspace/save_results/longcat_image_i2i_pipeline.png"
# Generate edited image
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
LongCat Image text-to-image generation example.
This example demonstrates how to use LightX2V with LongCat-Image model for T2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for LongCat-Image T2I task
pipe = LightX2VPipeline(
model_path="/data/nvme1/models/meituan-longcat/LongCat-Image",
model_cls="longcat_image",
task="t2i",
)
# Enable offloading to reduce VRAM usage (optional)
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block",
text_encoder_offload=True,
vae_offload=False,
)
# Create generator from config JSON file
pipe.create_generator(config_json="/workspace/configs/longcat_image/longcat_image_t2i.json")
# Generation parameters
seed = 42
prompt = "一只小狗躺在沙发上"
negative_prompt = ""
save_result_path = "/workspace/save_results/longcat_image_t2i_pipeline.png"
# Generate image
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
from lightx2v import LightX2VPipeline
pipe = LightX2VPipeline(model_path="Lightricks/LTX-2/", model_cls="ltx2", task="i2av", dit_original_ckpt="Lightricks/LTX-2/ltx-2-19b-dev.safetensors")
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block",
# text_encoder_offload=False,
# vae_offload=False,
# )
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=40,
height=512,
width=768,
num_frames=121,
guidance_scale=4.0,
sample_shift=[2.05, 0.95],
fps=24,
audio_fps=24000,
double_precision_rope=True,
norm_modulate_backend="triton", # "torch"
)
seed = 42
image_path = "/path/to/woman.jpeg" # For multiple images, use comma-separated paths: "path1.jpg,path2.jpg"
image_strength = 1.0 # Scalar: use same strength for all images, or list: [1.0, 0.8] for different strengths
prompt = "A young woman with wavy, shoulder-length light brown hair is singing and dancing joyfully outdoors on a foggy day. She wears a cozy pink turtleneck sweater, swaying gracefully to the music with animated expressions and bright, piercing blue eyes. Her movements are fluid and energetic as she twirls and gestures expressively. A wooden fence and a misty, grassy field fade into the background, creating a dreamy atmosphere for her lively performance."
negative_prompt = "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
save_result_path = "/path/to/save_results/output.mp4"
# Note: image_strength can also be set in config_json
# For scalar: image_strength = 1.0 (all images use same strength)
# For list: image_strength = [1.0, 0.8] (must match number of images)
pipe.generate(
seed=seed,
prompt=prompt,
image_path=image_path,
image_strength=image_strength,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
from lightx2v import LightX2VPipeline
pipe = LightX2VPipeline(
model_path="Lightricks/LTX-2",
model_cls="ltx2",
task="i2av",
)
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="Lightricks/LTX-2/ltx-2-19b-distilled-fp8.safetensors",
quant_scheme="fp8-pertensor",
skip_fp8_block_index=[0, 43, 44, 45, 46, 47],
)
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block",
# text_encoder_offload=False,
# vae_offload=False,
# )
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=8,
height=512,
width=768,
num_frames=121,
guidance_scale=1.0,
sample_shift=[2.05, 0.95],
fps=24,
audio_fps=24000,
double_precision_rope=True,
norm_modulate_backend="triton", # "torch"
distilled_sigma_values=[1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0],
)
seed = 42
image_path = "/path/to/LightX2V/assets/inputs/imgs/woman.jpeg" # For multiple images, use comma-separated paths: "path1.jpg,path2.jpg"
image_strength = 1.0 # Scalar: use same strength for all images, or list: [1.0, 0.8] for different strengths
prompt = "A young woman with wavy, shoulder-length light brown hair is singing and dancing joyfully outdoors on a foggy day. She wears a cozy pink turtleneck sweater, swaying gracefully to the music with animated expressions and bright, piercing blue eyes. Her movements are fluid and energetic as she twirls and gestures expressively. A wooden fence and a misty, grassy field fade into the background, creating a dreamy atmosphere for her lively performance."
negative_prompt = "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
save_result_path = "/path/to/LightX2V/save_results/output_lightx2v_ltx2_i2av_distilled_fp8.mp4"
# Note: image_strength can also be set in config_json
# For scalar: image_strength = 1.0 (all images use same strength)
# For list: image_strength = [1.0, 0.8] (must match number of images)
pipe.generate(
seed=seed,
prompt=prompt,
image_path=image_path,
image_strength=image_strength,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
from lightx2v import LightX2VPipeline
pipe = LightX2VPipeline(model_path="Lightricks/LTX-2/", model_cls="ltx2", task="t2av", dit_original_ckpt="Lightricks/LTX-2/ltx-2-19b-dev.safetensors")
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block",
# text_encoder_offload=False,
# vae_offload=False,
# )
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=40,
height=512,
width=768,
num_frames=121,
guidance_scale=4.0,
sample_shift=[2.05, 0.95],
fps=24,
audio_fps=24000,
double_precision_rope=True,
norm_modulate_backend="triton", # "torch"
)
seed = 42
prompt = "A beautiful sunset over the ocean"
negative_prompt = "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
save_result_path = "/path/to/save_results/output.mp4"
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
from lightx2v import LightX2VPipeline
pipe = LightX2VPipeline(model_path="Lightricks/LTX-2", model_cls="ltx2", task="t2av")
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="Lightricks/LTX-2/ltx-2-19b-distilled-fp8.safetensors",
quant_scheme="fp8-pertensor",
skip_fp8_block_index=[0, 43, 44, 45, 46, 47],
)
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block",
# text_encoder_offload=False,
# vae_offload=False,
# )
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=8,
height=512,
width=768,
num_frames=121,
guidance_scale=1,
sample_shift=[2.05, 0.95],
fps=24,
audio_fps=24000,
double_precision_rope=True,
norm_modulate_backend="triton", # "torch"
distilled_sigma_values=[1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0],
)
seed = 42
prompt = "A beautiful sunset over the ocean"
negative_prompt = "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
save_result_path = "/path/to/LightX2V/save_results/output_lightx2v_ltx2_t2av_distilled_fp8.mp4"
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
# Qwen Image Examples
This directory contains usage examples for Qwen Image and Qwen Image Edit models.
## Benchmark Results
DiT inference time comparison (excluding warmup time, data updated on 2025.12.23):
<div align="center">
<img src="../../assets/figs/qwen/qwen-image-edit-2511.png" alt="Qwen-Image-Edit-2511" width="60%">
</div>
## Model Download
Before using the example scripts, you need to download the corresponding models. All models can be downloaded from the following addresses:
Text-to-Image Models (2512 is the latest model)
- [Qwen-Image-2512](https://huggingface.co/Qwen/Qwen-Image-2512)
- [Qwen-Image-2512-Lightning](https://huggingface.co/lightx2v/Qwen-Image-2512-Lightning)
Image Editing Models (2511 is the latest model)
- [Qwen-Image-Edit-2511](https://huggingface.co/Qwen/Qwen-Image-Edit-2511)
- [LightX2V-Qwen-Image-Edit-2511](https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning)
## Usage Method 1: Using Bash Scripts (Highly Recommended)
For environment setup, we recommend using our Docker image. Please refer to [quickstart](https://lightx2v-en.readthedocs.io/en/latest/getting_started/quickstart.html)
```
git clone https://github.com/ModelTC/LightX2V.git
cd LightX2V/scripts/qwen_image
# Before running the scripts below, you need to replace lightx2v_path and model_path in the scripts with actual paths
# For example: lightx2v_path=/home/user/LightX2V
# For example: model_path=/home/user/models/Qwen-Image-Edit-2511
```
Text-to-Image Models
```
# Inference with 2512 text-to-image original model, default is 50 steps
bash qwen_image_t2i_2512.sh
# Inference with 2512 text-to-image step-distilled model, default is 8 steps, requires downloading LoRA model, then modify the lora_configs path in config_json file
bash qwen_image_t2i_2512_distill.sh
# Inference with 2512 text-to-image step-distilled + FP8 quantized model, default is 8 steps, requires downloading FP8 quantized model, then modify the dit_quantized_ckpt path in config_json file
bash qwen_image_t2i_2512_distill_fp8.sh
```
Image Editing Models
```
# Inference with 2511 image editing original model, default is 40 steps
bash qwen_image_i2i_2511.sh
# Inference with 2511 image editing step-distilled model, default is 8 steps, requires downloading LoRA model, then modify the lora_configs path in config_json file
bash qwen_image_i2i_2511_distill.sh
# Inference with 2511 image editing step-distilled + FP8 quantized model, default is 8 steps, requires downloading FP8 quantized model, then modify the dit_quantized_ckpt path in config_json file
bash qwen_image_i2i_2511_distill_fp8.sh
```
## Usage Method 2: Install and Use Python Scripts
For environment setup, we recommend using our Docker image. Please refer to [quickstart](https://lightx2v-en.readthedocs.io/en/latest/getting_started/quickstart.html)
First, clone the repository and install dependencies:
```bash
git clone https://github.com/ModelTC/LightX2V.git
cd LightX2V
pip install -v -e .
```
Running Step-Distilled + FP8 Quantized Model
Run the `qwen_2511_fp8.py` script, which uses a model optimized with step distillation and FP8 quantization:
```bash
cd examples/qwen_image/
python qwen_2511_fp8.py
```
This approach reduces inference steps through step distillation technology while using FP8 quantization to reduce model size and memory footprint, achieving faster inference speed.
Running Qwen-Image-Edit-2511 Model + Distilled LoRA
Run the `qwen_2511_with_distill_lora.py` script, which uses the Qwen-Image-Edit-2511 base model with distilled LoRA:
```bash
cd examples/qwen_image/
python qwen_2511_with_distill_lora.py
```
This approach uses the complete Qwen-Image-Edit-2511 model and optimizes it through distilled LoRA, improving inference efficiency while maintaining model performance.
# Qwen Image 示例
本目录包含 Qwen Image 和 Qwen Image Edit 模型的使用示例。
## 测速结果
Dit部分的推理耗时对比(不包含预热时间,数据更新于2025.12.23):
<div align="center">
<img src="../../assets/figs/qwen/qwen-image-edit-2511.png" alt="Qwen-Image-Edit-2511" width="60%">
</div>
## 模型下载
在使用示例脚本之前,需要先下载相应的模型。所有模型都可以从以下地址下载:
文生图模型(2512是最新的模型)
- [Qwen-Image-2512](https://huggingface.co/Qwen/Qwen-Image-2512)
- [Qwen-Image-2512-Lightning](https://huggingface.co/lightx2v/Qwen-Image-2512-Lightning)
图像编辑模型(2511是最新的模型)
- [Qwen-Image-Edit-2511](https://huggingface.co/Qwen/Qwen-Image-Edit-2511)
- [LightX2V-Qwen-Image-Edit-2511](https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning)
## 使用方式一:使用bash脚本(强烈推荐)
环境安装推荐使用我们的docker镜像,可以参考[quickstart](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/getting_started/quickstart.html)
```
git clone https://github.com/ModelTC/LightX2V.git
cd LightX2V/scripts/qwen_image
# 运行下面的脚本之前,都需要将脚本中的lightx2v_path和model_path替换为实际路径
# 例如:lightx2v_path=/home/user/LightX2V
# 例如:model_path=/home/user/models/Qwen-Image-Edit-2511
```
文生图模型
```
# 推理2512文生图原始模型,默认是50步
bash qwen_image_t2i_2512.sh
# 推理2512文生图步数蒸馏模型,默认是8步,需要下载LoRA模型,然后修改config_json文件中的lora_configs的路径
bash qwen_image_t2i_2512_distill.sh
# 推理2512文生图步数蒸馏+FP8量化模型,默认是8步,需要下载FP8量化模型,然后修改config_json文件中的dit_quantized_ckpt的路径
bash qwen_image_t2i_2512_distill_fp8.sh
```
图像编辑模型
```
# 推理2511图像编辑原始模型,默认是40步
bash qwen_image_i2i_2511.sh
# 推理2511图像编辑步数蒸馏模型,默认是8步,需要下载LoRA模型,然后修改config_json文件中的lora_configs的路径
bash qwen_image_i2i_2511_distill.sh
# 推理2511图像编辑步数蒸馏+FP8量化模型,默认是8步,需要下载FP8量化模型,然后修改config_json文件中的dit_quantized_ckpt的路径
bash qwen_image_i2i_2511_distill_fp8.sh
```
## 使用方式二:安装并使用python脚本
环境安装推荐使用我们的docker镜像,可以参考[quickstart](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/getting_started/quickstart.html)
首先克隆仓库并安装依赖:
```bash
git clone https://github.com/ModelTC/LightX2V.git
cd LightX2V
pip install -v -e .
```
运行步数蒸馏 + FP8 量化模型
运行 `qwen_2511_fp8.py` 脚本,该脚本使用步数蒸馏和 FP8 量化优化的模型:
```bash
cd examples/qwen_image/
python qwen_2511_fp8.py
```
该方式通过步数蒸馏技术减少推理步数,同时使用 FP8 量化降低模型大小和内存占用,实现更快的推理速度。
运行 Qwen-Image-Edit-2511 模型 + 蒸馏 LoRA
运行 `qwen_2511_with_distill_lora.py` 脚本,该脚本使用 Qwen-Image-Edit-2511 基础模型配合蒸馏 LoRA:
```bash
cd examples/qwen_image/
python qwen_2511_with_distill_lora.py
```
该方式使用完整的 Qwen-Image-Edit-2511 模型,并通过蒸馏 LoRA 进行模型优化,在保持模型性能的同时提升推理效率。
"""
Qwen-image-edit image-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-Edit model for I2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-edit I2I task
# For Qwen-Image-Edit-2509, use model_cls="qwen-image-edit-2509"
pipe = LightX2VPipeline(
model_path="/path/to/Qwen-Image-Edit-2511",
model_cls="qwen-image-edit-2511",
task="i2i",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/qwen_image/qwen_image_i2i_2511_distill_fp8.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load fp8 distilled weights (and int4 Qwen2_5 vl model (optional))
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="lightx2v/Qwen-Image-Edit-2511-Lightning/qwen_image_edit_2511_fp8_e4m3fn_scaled_lightning_4steps_v1.0.safetensors",
quant_scheme="fp8-sgl",
# text_encoder_quantized=True,
# text_encoder_quantized_ckpt="lightx2v/Encoders/GPTQModel/Qwen25-VL-4bit-GPTQ",
# text_encoder_quant_scheme="int4"
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
resize_mode="adaptive",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = "Replace the polka-dot shirt with a light blue shirt."
negative_prompt = ""
image_path = "/path/to/img.png" # or "/path/to/img_0.jpg,/path/to/img_1.jpg"
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit image-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-Edit model for I2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-edit I2I task
# For Qwen-Image-Edit-2509, use model_cls="qwen-image-edit-2509"
pipe = LightX2VPipeline(
model_path="/path/to/Qwen-Image-Edit-2511",
model_cls="qwen-image-edit-2511",
task="i2i",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/qwen_image/qwen_image_i2i_2511_lora.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load distilled LoRA weights
pipe.enable_lora(
[
{"path": "lightx2v/Qwen-Image-Edit-2511-Lightning/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-fp32.safetensors", "strength": 1.0},
],
lora_dynamic_apply=False, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
resize_mode="adaptive",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = "Replace the polka-dot shirt with a light blue shirt."
negative_prompt = ""
image_path = "/path/to/img.png" # or "/path/to/img_0.jpg,/path/to/img_1.jpg"
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit image-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-Edit model for I2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-edit I2I task
# For Qwen-Image-Edit-2509, use model_cls="qwen-image-edit-2509"
pipe = LightX2VPipeline(
model_path="/path/to/Qwen-Image-Edit-2511",
model_cls="qwen-image-edit-2511",
task="i2i",
)
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load fp8 Base weights (and int4 Qwen2_5 vl model (optional))
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="lightx2v/Qwen-Image-Edit-2511-Lightning/qwen_image_edit_2511_fp8_e4m3fn_scaled.safetensors",
quant_scheme="fp8-sgl",
# text_encoder_quantized=True,
# text_encoder_quantized_ckpt="lightx2v/Encoders/GPTQModel/Qwen25-VL-4bit-GPTQ",
# text_encoder_quant_scheme="int4"
)
# Load distilled LoRA weights
pipe.enable_lora(
[
{
"path": "lightx2v/Qwen-Image-Edit-2511-Lightning/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-fp32.safetensors",
"strength": 1.0,
},
],
lora_dynamic_apply=True, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
resize_mode="adaptive",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = "Replace the polka-dot shirt with a light blue shirt."
negative_prompt = ""
image_path = "/path/to/img.png" # or "/path/to/img_0.jpg,/path/to/img_1.jpg"
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit image-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-Edit model for I2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-edit I2I task
# For Qwen-Image-Edit-2509, use model_cls="qwen-image-edit-2509"
pipe = LightX2VPipeline(
model_path="/path/to/Qwen-Image-Edit-2511",
model_cls="qwen-image-edit-2511",
task="i2i",
)
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load fp8 Base weights (and int4 Qwen2_5 vl model (optional))
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="lightx2v/Qwen-Image-Edit-2511-Lightning/qwen_image_edit_2511_fp8_e4m3fn_scaled.safetensors",
quant_scheme="fp8-sgl",
# text_encoder_quantized=True,
# text_encoder_quantized_ckpt="lightx2v/Encoders/GPTQModel/Qwen25-VL-4bit-GPTQ",
# text_encoder_quant_scheme="int4"
)
# Load distilled LoRA weights
pipe.enable_lora(
[
{
"path": "/path/to/lora_1.safetensors",
"strength": 1.0,
},
],
lora_dynamic_apply=True, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
resize_mode="adaptive",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = "Replace the polka-dot shirt with a light blue shirt."
negative_prompt = ""
image_path = "/path/to/img.png" # or "/path/to/img_0.jpg,/path/to/img_1.jpg"
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
# Switch LoRA
pipe.switch_lora(lora_path="/path/to/lora_2.safetensors", strength=1.0)
# Generation parameters
seed = 42
prompt = "Replace the polka-dot shirt with a light blue shirt."
negative_prompt = ""
image_path = "/path/to/img.png" # or "/path/to/img_0.jpg,/path/to/img_1.jpg"
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit Text-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image model for T2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-edit T2I task
pipe = LightX2VPipeline(
model_path="/path/to/Qwen/Qwen-Image-2512",
model_cls="qwen-image-2512",
task="t2i",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/qwen_image/qwen_image_t2i_2512_distill_fp8.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load fp8 distilled weights (and int4 Qwen2_5 vl model (optional))
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="lightx2v/Qwen-Image-2512-Lightning/qwen_image_2512_fp8_e4m3fn_scaled_4steps_v1.0.safetensors",
quant_scheme="fp8-sgl",
# text_encoder_quantized=True,
# text_encoder_quantized_ckpt="lightx2v/Encoders/GPTQModel/Qwen25-VL-4bit-GPTQ",
# text_encoder_quant_scheme="int4"
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
aspect_ratio="16:9",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = 'A coffee shop entrance features a chalkboard sign reading "Qwen Coffee 😊 $2 per cup," with a neon light beside it displaying "通义千问". Next to it hangs a poster showing a beautiful Chinese woman, and beneath the poster is written "π≈3.1415926-53589793-23846264-33832795-02384197". Ultra HD, 4K, cinematic composition, Ultra HD, 4K, cinematic composition.'
negative_prompt = ""
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit Text-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-2512 model for T2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-2512 T2I task
pipe = LightX2VPipeline(
model_path="/path/to/Qwen/Qwen-Image-2512",
model_cls="qwen-image-2512",
task="t2i",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/qwen_image/qwen_image_t2i_2512_lora.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load distilled LoRA weights
pipe.enable_lora(
[
{"path": "lightx2v/Qwen-Image-2512-Lightning/Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors", "strength": 1.0},
],
lora_dynamic_apply=False, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
aspect_ratio="16:9",
infer_steps=8,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = 'A coffee shop entrance features a chalkboard sign reading "Qwen Coffee 😊 $2 per cup," with a neon light beside it displaying "通义千问". Next to it hangs a poster showing a beautiful Chinese woman, and beneath the poster is written "π≈3.1415926-53589793-23846264-33832795-02384197". Ultra HD, 4K, cinematic composition, Ultra HD, 4K, cinematic composition.'
negative_prompt = ""
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image-edit Text-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image-2512 model for T2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image-2512 T2I task
pipe = LightX2VPipeline(
model_path="/path/to/Qwen/Qwen-Image-2512",
model_cls="qwen-image-2512",
task="t2i",
)
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load fp8 Base weights (and int4 Qwen2_5 vl model (optional))
pipe.enable_quantize(
dit_quantized=True,
dit_quantized_ckpt="lightx2v/Qwen-Image-2512-Lightning/qwen_image_2512_fp8_e4m3fn_scaled.safetensors",
quant_scheme="fp8-sgl",
# text_encoder_quantized=True,
# text_encoder_quantized_ckpt="lightx2v/Encoders/GPTQModel/Qwen25-VL-4bit-GPTQ",
# text_encoder_quant_scheme="int4"
)
# Load distilled LoRA weights
pipe.enable_lora(
[
{
"path": "lightx2v/Qwen-Image-2512-Lightning/Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors",
"strength": 1.0,
},
],
lora_dynamic_apply=True, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
aspect_ratio="16:9",
infer_steps=4,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = 'A coffee shop entrance features a chalkboard sign reading "Qwen Coffee 😊 $2 per cup," with a neon light beside it displaying "通义千问". Next to it hangs a poster showing a beautiful Chinese woman, and beneath the poster is written "π≈3.1415926-53589793-23846264-33832795-02384197". Ultra HD, 4K, cinematic composition, Ultra HD, 4K, cinematic composition.'
negative_prompt = ""
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Qwen-image Text-to-image generation example.
This example demonstrates how to use LightX2V with Qwen-Image model for T2I generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Qwen-image T2I task
pipe = LightX2VPipeline(
model_path="QWen/Qwen-Image",
model_cls="qwen-image",
task="t2i",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/qwen_image/qwen_image_52i_2512_lora.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block", #["block", "phase"]
# text_encoder_offload=True,
# vae_offload=False,
# )
# Load distilled LoRA weights
pipe.enable_lora(
[
{
"path": "lightx2v/Qwen-Image-Lightning/Qwen-Image/Qwen-Image-fp8-e4m3fn-Lightning-4steps-V1.0-fp32.safetensors",
"strength": 1.0,
},
],
lora_dynamic_apply=True, # Support inference with LoRA weights, save memory but slower, default is False
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="flash_attn3",
aspect_ratio="16:9",
infer_steps=4,
guidance_scale=1,
)
# Generation parameters
seed = 42
prompt = 'A coffee shop entrance features a chalkboard sign reading "Qwen Coffee 😊 $2 per cup," with a neon light beside it displaying "通义千问". Next to it hangs a poster showing a beautiful Chinese woman, and beneath the poster is written "π≈3.1415926-53589793-23846264-33832795-02384197". Ultra HD, 4K, cinematic composition, Ultra HD, 4K, cinematic composition.'
negative_prompt = ""
save_result_path = "/path/to/save_results/output.png"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Wan2.2 animate video generation example.
This example demonstrates how to use LightX2V with Wan2.2 model for animate video generation.
First, run preprocessing:
1. Set up environment: pip install -r ../requirements_animate.txt
2. For animate mode:
python ../tools/preprocess/preprocess_data.py \
--ckpt_path /path/to/Wan2.1-FLF2V-14B-720P/process_checkpoint \
--video_path /path/to/video \
--refer_path /path/to/ref_img \
--save_path ../save_results/animate/process_results \
--resolution_area 1280 720 \
--retarget_flag
3. For replace mode:
python ../tools/preprocess/preprocess_data.py \
--ckpt_path /path/to/Wan2.1-FLF2V-14B-720P/process_checkpoint \
--video_path /path/to/video \
--refer_path /path/to/ref_img \
--save_path ../save_results/replace/process_results \
--resolution_area 1280 720 \
--iterations 3 \
--k 7 \
--w_len 1 \
--h_len 1 \
--replace_flag
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for animate task
pipe = LightX2VPipeline(
model_path="/path/to/Wan2.1-FLF2V-14B-720P",
model_cls="wan2.2_animate",
task="animate",
)
pipe.replace_flag = True # Set to True for replace mode, False for animate mode
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/wan/wan_animate_replace.json"
# )
# Create generator with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=20,
height=480, # Can be set to 720 for higher resolution
width=832, # Can be set to 1280 for higher resolution
num_frames=77,
guidance_scale=1,
sample_shift=5.0,
fps=30,
)
seed = 42
prompt = "视频中的人在做动作"
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
src_pose_path = "../save_results/animate/process_results/src_pose.mp4"
src_face_path = "../save_results/animate/process_results/src_face.mp4"
src_ref_images = "../save_results/animate/process_results/src_ref.png"
save_result_path = "/path/to/save_results/output.mp4"
pipe.generate(
seed=seed,
src_pose_path=src_pose_path,
src_face_path=src_face_path,
src_ref_images=src_ref_images,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Wan2.1 first-last-frame-to-video generation example.
This example demonstrates how to use LightX2V with Wan2.1 model for FLF2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for FLF2V task
pipe = LightX2VPipeline(
model_path="/path/to/Wan2.1-FLF2V-14B-720P",
model_cls="wan2.1",
task="flf2v",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/wan/wan_flf2v.json"
# )
# Optional: enable offloading to significantly reduce VRAM usage
# Suitable for RTX 30/40/50 consumer GPUs
# pipe.enable_offload(
# cpu_offload=True,
# offload_granularity="block",
# text_encoder_offload=True,
# image_encoder_offload=False,
# vae_offload=False,
# )
# Create generator with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=40,
height=480, # Can be set to 720 for higher resolution
width=832, # Can be set to 1280 for higher resolution
num_frames=81,
guidance_scale=5,
sample_shift=5.0,
)
seed = 42
prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird’s feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path = "../assets/inputs/imgs/flf2v_input_first_frame-fs8.png"
last_frame_path = "../assets/inputs/imgs/flf2v_input_last_frame-fs8.png"
save_result_path = "/path/to/save_results/output.mp4"
pipe.generate(
image_path=image_path,
last_frame_path=last_frame_path,
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
"""
Wan2.2 image-to-video generation example.
This example demonstrates how to use LightX2V with Wan2.2 model for I2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 I2V task
# For wan2.1, use model_cls="wan2.1"
pipe = LightX2VPipeline(
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe",
task="i2v",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="../configs/wan22/wan_moe_i2v.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block", # For Wan models, supports both "block" and "phase"
text_encoder_offload=True,
image_encoder_offload=False,
vae_offload=False,
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=40,
height=480, # Can be set to 720 for higher resolution
width=832, # Can be set to 1280 for higher resolution
num_frames=81,
guidance_scale=[3.5, 3.5], # For wan2.1, guidance_scale is a scalar (e.g., 5.0)
sample_shift=5.0,
)
# Generation parameters
seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path = "/path/to/img_0.jpg"
save_result_path = "/path/to/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment