Unverified Commit 5b20c67a authored by gushiqiao's avatar gushiqiao Committed by GitHub
Browse files

update readme (#501)

parent f7a67d0e
......@@ -42,39 +42,65 @@ uv pip install -v . # pip install -v .
For attention operators installation, please refer to our documentation: **[English Docs](https://lightx2v-en.readthedocs.io/en/latest/getting_started/quickstart.html#step-4-install-attention-operators) | [中文文档](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/getting_started/quickstart.html#id9)**
### Quick Start
```python
# examples/hunyuan_video/hunyuan_t2v.py
# examples/wan/wan_i2v.py
"""
Wan2.2 image-to-video generation example.
This example demonstrates how to use LightX2V with Wan2.2 model for I2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 I2V task
# For wan2.1, use model_cls="wan2.1"
pipe = LightX2VPipeline(
model_path="/path/to/ckpts/hunyuanvideo-1.5/",
model_cls="hunyuan_video_1.5",
transformer_model_name="720p_t2v",
task="t2v",
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe",
task="i2v",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="configs/wan22/wan_moe_i2v.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block", # For Wan models, supports both "block" and "phase"
text_encoder_offload=True,
image_encoder_offload=False,
vae_offload=False,
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=50,
num_frames=121,
guidance_scale=6.0,
sample_shift=9.0,
aspect_ratio="16:9",
fps=24,
infer_steps=40,
height=480, # Can be set to 720 for higher resolution
width=832, # Can be set to 1280 for higher resolution
num_frames=81,
guidance_scale=[3.5, 3.5], # For wan2.1, guidance_scale is a scalar (e.g., 5.0)
sample_shift=5.0,
)
seed = 123
prompt = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
negative_prompt = ""
save_result_path="/path/to/save_results/output.mp4"
# Generation parameters
seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path="/path/to/img_0.jpg"
save_result_path = "/path/to/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
```
> 💡 **More Examples**: For more usage examples including quantization, offloading, caching, and other advanced configurations, please refer to the [examples directory](https://github.com/ModelTC/LightX2V/tree/main/examples).
......@@ -98,13 +124,14 @@ pipe.generate(
### Lightweight Autoencoder Models (**🚀 Recommended: fast inference & low memory usage**)
-[Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
🔔 Follow our [HuggingFace page](https://huggingface.co/lightx2v) for the latest model releases from our team.
### Autoregressive Models
-[Wan2.1-T2V-CausVid](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-CausVid)
-[Self-Forcing](https://github.com/guandeh17/Self-Forcing)
-[Matrix-Game-2.0](https://huggingface.co/Skywork/Matrix-Game-2.0)
🔔 Follow our [HuggingFace page](https://huggingface.co/lightx2v) for the latest model releases from our team.
💡 Refer to the [Model Structure Documentation](https://lightx2v-en.readthedocs.io/en/latest/getting_started/model_structure.html) to quickly get started with LightX2V
## 🚀 Frontend Interfaces
......
......@@ -44,33 +44,59 @@ uv pip install -v . # pip install -v .
### 快速开始
```python
# examples/hunyuan_video/hunyuan_t2v.py
# examples/wan/wan_i2v.py
"""
Wan2.2 image-to-video generation example.
This example demonstrates how to use LightX2V with Wan2.2 model for I2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 I2V task
# For wan2.1, use model_cls="wan2.1"
pipe = LightX2VPipeline(
model_path="/path/to/ckpts/hunyuanvideo-1.5/",
model_cls="hunyuan_video_1.5",
transformer_model_name="720p_t2v",
task="t2v",
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe",
task="i2v",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(
# config_json="configs/wan22/wan_moe_i2v.json"
# )
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block", # For Wan models, supports both "block" and "phase"
text_encoder_offload=True,
image_encoder_offload=False,
vae_offload=False,
)
# Create generator manually with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=50,
num_frames=121,
guidance_scale=6.0,
sample_shift=9.0,
aspect_ratio="16:9",
fps=24,
infer_steps=40,
height=480, # Can be set to 720 for higher resolution
width=832, # Can be set to 1280 for higher resolution
num_frames=81,
guidance_scale=[3.5, 3.5], # For wan2.1, guidance_scale is a scalar (e.g., 5.0)
sample_shift=5.0,
)
seed = 123
prompt = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
negative_prompt = ""
save_result_path="/path/to/save_results/output.mp4"
# Generation parameters
seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path="/path/to/img_0.jpg"
save_result_path = "/path/to/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
......@@ -97,13 +123,14 @@ pipe.generate(
### 轻量级自编码器模型(**🚀 推荐:推理快速 + 内存占用低**)
-[Autoencoders](https://huggingface.co/lightx2v/Autoencoders)
🔔 可以关注我们的[HuggingFace主页](https://huggingface.co/lightx2v),及时获取我们团队的模型。
### 自回归模型
-[Wan2.1-T2V-CausVid](https://huggingface.co/lightx2v/Wan2.1-T2V-14B-CausVid)
-[Self-Forcing](https://github.com/guandeh17/Self-Forcing)
-[Matrix-Game-2.0](https://huggingface.co/Skywork/Matrix-Game-2.0)
🔔 可以关注我们的[HuggingFace主页](https://huggingface.co/lightx2v),及时获取我们团队的模型。
💡 参考[模型结构文档](https://lightx2v-zhcn.readthedocs.io/zh-cn/latest/getting_started/model_structure.html)快速上手 LightX2V
## 🚀 前端展示
......
......@@ -63,7 +63,6 @@ Pass the model path to `LightX2VPipeline`:
```python
pipe = LightX2VPipeline(
image_path="/path/to/img_0.jpg", # Required for I2V tasks
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe", # For wan2.1, use "wan2.1"
task="i2v",
......
......@@ -63,7 +63,6 @@ pipe.generate(
```python
pipe = LightX2VPipeline(
image_path="/path/to/img_0.jpg", # I2V 任务需要
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe", # 对于 wan2.1,使用 "wan2.1"
task="i2v",
......
......@@ -8,7 +8,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for HunyuanVideo-1.5 I2V task
pipe = LightX2VPipeline(
image_path="/path/to/assets/inputs/imgs/img_0.jpg",
model_path="/path/to/ckpts/hunyuanvideo-1.5/",
model_cls="hunyuan_video_1.5",
transformer_model_name="720p_i2v",
......
......@@ -31,9 +31,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for animate task
pipe = LightX2VPipeline(
model_path="/path/to/Wan2.1-FLF2V-14B-720P",
src_pose_path="../save_results/animate/process_results/src_pose.mp4",
src_face_path="../save_results/animate/process_results/src_face.mp4",
src_ref_images="../save_results/animate/process_results/src_ref.png",
model_cls="wan2.2_animate",
task="animate",
)
......@@ -59,10 +56,16 @@ pipe.create_generator(
seed = 42
prompt = "视频中的人在做动作"
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
src_pose_path = "../save_results/animate/process_results/src_pose.mp4"
src_face_path = "../save_results/animate/process_results/src_face.mp4"
src_ref_images = "../save_results/animate/process_results/src_ref.png"
save_result_path = "/path/to/save_results/output.mp4"
pipe.generate(
seed=seed,
src_pose_path=src_pose_path,
src_face_path=src_face_path,
src_ref_images=src_ref_images,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
......
......@@ -8,8 +8,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for FLF2V task
pipe = LightX2VPipeline(
model_path="/path/to/Wan2.1-FLF2V-14B-720P",
image_path="../assets/inputs/imgs/flf2v_input_first_frame-fs8.png",
last_frame_path="../assets/inputs/imgs/flf2v_input_last_frame-fs8.png",
model_cls="wan2.1",
task="flf2v",
)
......@@ -43,9 +41,13 @@ pipe.create_generator(
seed = 42
prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird’s feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path = "../assets/inputs/imgs/flf2v_input_first_frame-fs8.png"
last_frame_path = "../assets/inputs/imgs/flf2v_input_last_frame-fs8.png"
save_result_path = "/path/to/save_results/output.mp4"
pipe.generate(
image_path=image_path,
last_frame_path=last_frame_path,
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
......
......@@ -8,7 +8,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 I2V task
# For wan2.1, use model_cls="wan2.1"
pipe = LightX2VPipeline(
image_path="/path/to/img_0.jpg",
model_path="/path/to/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe",
task="i2v",
......@@ -44,11 +43,13 @@ pipe.create_generator(
seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
image_path = "/path/to/img_0.jpg"
save_result_path = "/path/to/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
......
......@@ -8,7 +8,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 distilled I2V task
# For wan2.1, use model_cls="wan2.1_distill"
pipe = LightX2VPipeline(
image_path="/path/to/img_0.jpg",
model_path="/path/to/wan2.2/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe_distill",
task="i2v",
......@@ -47,9 +46,11 @@ seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
save_result_path = "/path/to/save_results/output.mp4"
image_path = "/path/to/img_0.jpg"
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
......
......@@ -8,7 +8,6 @@ from lightx2v import LightX2VPipeline
# Initialize pipeline for Wan2.2 distilled I2V task with LoRA
# For wan2.1, use model_cls="wan2.1_distill"
pipe = LightX2VPipeline(
image_path="/path/to/img_0.jpg",
model_path="/path/to/wan2.2/Wan2.2-I2V-A14B",
model_cls="wan2.2_moe_distill",
task="i2v",
......@@ -52,9 +51,11 @@ seed = 42
prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
negative_prompt = "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
save_result_path = "/path/to/save_results/output.mp4"
image_path = "/path/to/img_0.jpg"
pipe.generate(
seed=seed,
image_path=image_path,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
......
......@@ -12,6 +12,13 @@ import torch
import torch.distributed as dist
from loguru import logger
from lightx2v.models.runners.hunyuan_video.hunyuan_video_15_runner import HunyuanVideo15Runner # noqa: F401
from lightx2v.models.runners.qwen_image.qwen_image_runner import QwenImageRunner # noqa: F401
from lightx2v.models.runners.wan.wan_animate_runner import WanAnimateRunner # noqa: F401
from lightx2v.models.runners.wan.wan_audio_runner import Wan22AudioRunner, WanAudioRunner # noqa: F401
from lightx2v.models.runners.wan.wan_distill_runner import WanDistillRunner # noqa: F401
from lightx2v.models.runners.wan.wan_matrix_game2_runner import WanSFMtxg2Runner # noqa: F401
from lightx2v.models.runners.wan.wan_runner import Wan22MoeRunner, WanRunner # noqa: F401
from lightx2v.models.runners.wan.wan_sf_runner import WanSFRunner # noqa: F401
from lightx2v.models.runners.wan.wan_vace_runner import WanVaceRunner # noqa: F401
from lightx2v.utils.input_info import set_input_info
......@@ -53,13 +60,7 @@ class LightX2VPipeline:
task,
model_path,
model_cls,
image_path=None,
last_frame_path=None,
audio_path=None,
sf_model_path=None,
src_ref_images=None,
src_video=None,
src_mask=None,
dit_original_ckpt=None,
low_noise_original_ckpt=None,
high_noise_original_ckpt=None,
......@@ -69,13 +70,6 @@ class LightX2VPipeline:
self.model_path = model_path
self.model_cls = model_cls
self.sf_model_path = sf_model_path
self.image_path = image_path
self.last_frame_path = last_frame_path
self.audio_path = audio_path
self.src_ref_images = src_ref_images
self.src_video = src_video
self.src_mask = src_mask
self.dit_original_ckpt = dit_original_ckpt
self.low_noise_original_ckpt = low_noise_original_ckpt
self.high_noise_original_ckpt = high_noise_original_ckpt
......@@ -326,10 +320,23 @@ class LightX2VPipeline:
prompt,
negative_prompt,
save_result_path,
image_path=None,
last_frame_path=None,
audio_path=None,
src_ref_images=None,
src_video=None,
src_mask=None,
return_result_tensor=False,
):
# Run inference (following LightX2V pattern)
self.seed = seed
self.image_path = image_path
self.last_frame_path = last_frame_path
self.audio_path = audio_path
self.src_ref_images = src_ref_images
self.src_video = src_video
self.src_mask = src_mask
self.prompt = prompt
self.negative_prompt = negative_prompt
self.save_result_path = save_result_path
......
......@@ -17,7 +17,9 @@ docker run --gpus all -itd --ipc=host --name [container_name] -v [mount_settings
Please follow the instructions in [HunyuanVideo1.5 Github](https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5/blob/main/checkpoints-download.md) to download and place the model files.
4. Run the script
4. Running
Running using bash script
```bash
# enter the docker container
......@@ -28,6 +30,70 @@ cd LightX2V/scripts/hunyuan_video_15
bash run_hy15_t2v_480p.sh
```
Running using Python code
```python
"""
HunyuanVideo-1.5 text-to-video generation example.
This example demonstrates how to use LightX2V with HunyuanVideo-1.5 model for T2V generation.
"""
from lightx2v import LightX2VPipeline
# Initialize pipeline for HunyuanVideo-1.5
pipe = LightX2VPipeline(
model_path="/path/to/ckpts/hunyuanvideo-1.5/",
model_cls="hunyuan_video_1.5",
transformer_model_name="720p_t2v",
task="t2v",
)
# Alternative: create generator from config JSON file
# pipe.create_generator(config_json="configs/hunyuan_video_15/hunyuan_video_t2v_720p.json")
# Enable offloading to significantly reduce VRAM usage with minimal speed impact
# Suitable for RTX 30/40/50 consumer GPUs
pipe.enable_offload(
cpu_offload=True,
offload_granularity="block", # For HunyuanVideo-1.5, only "block" is supported
text_encoder_offload=True,
image_encoder_offload=False,
vae_offload=False,
)
# Use lighttae
pipe.enable_lightvae(
use_tae=True,
tae_path="/path/to/lighttaehy1_5.safetensors",
use_lightvae=False,
vae_path=None,
)
# Create generator with specified parameters
pipe.create_generator(
attn_mode="sage_attn2",
infer_steps=50,
num_frames=121,
guidance_scale=6.0,
sample_shift=9.0,
aspect_ratio="16:9",
fps=24,
)
# Generation parameters
seed = 123
prompt = "A close-up shot captures a scene on a polished, light-colored granite kitchen counter, illuminated by soft natural light from an unseen window. Initially, the frame focuses on a tall, clear glass filled with golden, translucent apple juice standing next to a single, shiny red apple with a green leaf still attached to its stem. The camera moves horizontally to the right. As the shot progresses, a white ceramic plate smoothly enters the frame, revealing a fresh arrangement of about seven or eight more apples, a mix of vibrant reds and greens, piled neatly upon it. A shallow depth of field keeps the focus sharply on the fruit and glass, while the kitchen backsplash in the background remains softly blurred. The scene is in a realistic style."
negative_prompt = ""
save_result_path = "/path/to/save_results/output.mp4"
# Generate video
pipe.generate(
seed=seed,
prompt=prompt,
negative_prompt=negative_prompt,
save_result_path=save_result_path,
)
```
5. Check results
You can find the generated video files in the `save_results` folder.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment