Commit e08c4f90 authored by sandy's avatar sandy Committed by GitHub
Browse files

Merge branch 'main' into audio_r2v

parents 12bfd120 6d07a72e
......@@ -46,12 +46,26 @@ def generate_task_id():
def post_all_tasks(urls, messages):
msg_num = len(messages)
msg_index = 0
available_urls = []
for url in urls:
try:
_ = requests.get(f"{url}/v1/service/status").json()
except Exception as e:
continue
available_urls.append(url)
if not available_urls:
logger.error("No available urls.")
return
logger.info(f"available_urls: {available_urls}")
while True:
for url in urls:
response = requests.get(f"{url}/v1/local/video/generate/service_status").json()
for url in available_urls:
response = requests.get(f"{url}/v1/service/status").json()
if response["service_status"] == "idle":
logger.info(f"{url} service is idle, start task...")
response = requests.post(f"{url}/v1/local/video/generate", json=messages[msg_index])
response = requests.post(f"{url}/v1/tasks/", json=messages[msg_index])
logger.info(f"response: {response.json()}")
msg_index += 1
if msg_index == msg_num:
......
from tqdm import tqdm
import argparse
import glob
import os
import requests
import time
def post_i2v(image_path, output_path):
url = "http://localhost:8000"
file_name = os.path.basename(image_path)
prompt = os.path.splitext(file_name)[0]
save_video_path = os.path.join(output_path, f"{prompt}.mp4")
message = {
"prompt": prompt,
"negative_prompt": "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
"image_path": image_path,
"save_video_path": save_video_path,
}
while True:
response = requests.get(f"{url}/v1/service/status").json()
if response["service_status"] == "idle":
response = requests.post(f"{url}/v1/tasks/", json=message)
return
time.sleep(3)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_path", type=str, required=True, help="path to img files.")
parser.add_argument("--output_path", type=str, default="./vbench_i2v", help="output video path.")
args = parser.parse_args()
if os.path.exists(args.data_path):
img_files = glob.glob(os.path.join(args.data_path, "*.jpg"))
print(f"Found {len(img_files)} image files.")
with tqdm(total=len(img_files)) as progress_bar:
for idx, img_path in enumerate(img_files):
post_i2v(img_path, args.output_path)
progress_bar.update()
......@@ -42,7 +42,6 @@ python -m lightx2v.api_server \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/wan/wan_i2v_dist.json \
--port 8000 \
--start_inference \
--nproc_per_node 1
echo "Service stopped"
#!/bin/bash
# set path and first
lightx2v_path="/mnt/Text2Video/wangshankun/lightx2v"
model_path="/mnt/Text2Video/wangshankun/HF_Cache/Wan2.1-R2V-Audio-14B-720P/"
#lora_path="/mnt/Text2Video/wuzhuguanyu/Wan21_I2V_14B_lightx2v_cfg_step_distill_lora_rank64.safetensors"
#lora_path="/mnt/aigc/qiuzesong/Distill/DMD2/0716lightx2v/LightX2V/tools/extract/wan_r2v_V2_14B_lora_ran32.safetensors"
lightx2v_path=
model_path=
lora_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
......
#!/bin/bash
# set path and first
lightx2v_path="/mnt/Text2Video/wangshankun/lightx2v/"
model_path="/mnt/Text2Video/wangshankun/HF_Cache/Wan2.1-I2V-14B-CausVid/"
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
......
#!/bin/bash
# set path and first
lightx2v_path="/mnt/Text2Video/wangshankun/tmp_code/lightx2v/"
model_path="/mnt/Text2Video/wangshankun/HF_Cache/hub/models--Skywork--SkyReels-V2-DF-14B-540P/snapshots/7ff972ba7b6a33d2f6e6c976dd3cf2d36984eee4/"
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
......
......@@ -32,7 +32,7 @@ python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/adacache/wan_t2v_ada.json \
--config_json ${lightx2v_path}/configs/changing_resolution/wan_t2v.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_ada.mp4
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_changing_resolution.mp4
......@@ -10,6 +10,7 @@ from safetensors import safe_open, torch as st
from loguru import logger
from tqdm import tqdm
from collections import defaultdict
from qtorch.quant import float_quantize
def get_key_mapping_rules(direction, model_type):
......@@ -314,7 +315,8 @@ def quantize_tensor(w, w_bit=8, dtype=torch.int8):
max_val = w.abs().amax(dim=1, keepdim=True).clamp(min=1e-5)
if dtype == torch.float8_e4m3fn:
qmin, qmax = -448, 448
finfo = torch.finfo(dtype)
qmin, qmax = finfo.min, finfo.max
elif dtype == torch.int8:
qmin, qmax = -128, 127
......@@ -322,7 +324,9 @@ def quantize_tensor(w, w_bit=8, dtype=torch.int8):
scales = max_val / qmax
if dtype == torch.float8_e4m3fn:
w_q = torch.clamp(w / scales, qmin, qmax).to(dtype)
scaled_tensor = w / scales
scaled_tensor = torch.clip(scaled_tensor, qmin, qmax)
w_q = float_quantize(scaled_tensor.float(), 4, 3, rounding="nearest").to(dtype)
else:
w_q = torch.clamp(torch.round(w / scales), qmin, qmax).to(dtype)
......@@ -341,7 +345,8 @@ def quantize_model(
target_keys=["attn", "ffn"],
key_idx=2,
ignore_key=None,
dtype=torch.int8,
linear_dtype=torch.int8,
non_linear_dtype=torch.float,
):
"""
Quantize model weights in-place
......@@ -370,16 +375,20 @@ def quantize_model(
# Skip non-tensors, small tensors, and non-2D tensors
if not isinstance(tensor, torch.Tensor) or tensor.dim() != 2:
if tensor.dtype != non_linear_dtype:
weights[key] = tensor.to(non_linear_dtype)
continue
# Check if key matches target modules
parts = key.split(".")
if len(parts) < key_idx + 1 or parts[key_idx] not in target_keys:
if tensor.dtype != non_linear_dtype:
weights[key] = tensor.to(non_linear_dtype)
continue
try:
# Quantize tensor and store results
w_q, scales = quantize_tensor(tensor, w_bit, dtype)
w_q, scales = quantize_tensor(tensor, w_bit, linear_dtype)
# Replace original tensor and store scales
weights[key] = w_q
......@@ -440,9 +449,11 @@ def load_loras(lora_path, weight_dict, alpha):
elif name in lora_diffs:
name_diff = lora_diffs[name]
lora_diff = lora_weights[name_diff].to(param.device, param.dtype)
param += lora_diff * alpha
applied_count += 1
try:
param += lora_diff * alpha
applied_count += 1
except Exception as e:
continue
logger.info(f"Applied {applied_count} LoRA weight adjustments")
......@@ -500,7 +511,8 @@ def convert_weights(args):
target_keys=args.target_keys,
key_idx=args.key_idx,
ignore_key=args.ignore_key,
dtype=args.dtype,
linear_dtype=args.linear_dtype,
non_linear_dtype=args.non_linear_dtype,
)
os.makedirs(args.output, exist_ok=True)
......@@ -637,10 +649,17 @@ def main():
help="Device to use for quantization (cpu/cuda)",
)
parser.add_argument(
"--dtype",
"--linear_dtype",
type=str,
choices=["torch.int8", "torch.float8_e4m3fn"],
help="Data type for quantization",
help="Data type for linear",
)
parser.add_argument(
"--non_linear_dtype",
type=str,
default="torch.float32",
choices=["torch.bfloat16", "torch.float16"],
help="Data type for non-linear",
)
parser.add_argument("--lora_path", type=str, nargs="*", help="Path(s) to LoRA file(s). Can specify multiple paths separated by spaces.")
parser.add_argument(
......@@ -654,12 +673,8 @@ def main():
args = parser.parse_args()
if args.quantized:
if args.dtype == "torch.int8":
args.dtype = torch.int8
elif args.dtype == "torch.float8_e4m3fn":
args.dtype = torch.float8_e4m3fn
else:
raise ValueError(f"Not support dtype :{args.dtype}")
args.linear_dtype = eval(args.linear_dtype)
args.non_linear_dtype = eval(args.non_linear_dtype)
model_type_keys_map = {
"wan_dit": {
......
# 模型转换工具
# Model Conversion Tool
A powerful utility for converting model weights between different formats and performing quantization tasks.
This converter tool can convert model weights between different formats.
## Diffusers
Facilitates mutual conversion between diffusers architecture and lightx2v architecture
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
## Quantization
This tool supports converting fp32/fp16/bf16 model weights to INT8、FP8 type.
## Feature 1: Convert Quantized Models
This tool supports converting **FP32/FP16/BF16** model weights to **INT8, FP8** types.
### Wan DIT
......@@ -36,7 +14,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -48,7 +26,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -62,7 +40,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--lora_path /Path/To/LoRA1/ /Path/To/LoRA2/ \
--lora_alpha 1.0 1.0 \
......@@ -78,7 +56,7 @@ python converter.py \
--output /Path/To/output \
--output_ext ..safetensors \
--output_name hunyuan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type hunyuan_dit \
--quantized
```
......@@ -89,7 +67,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name hunyuan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type hunyuan_dit \
--quantized
```
......@@ -103,7 +81,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -111,10 +90,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth \
--output /Path/To/output \
--output /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/fp8 \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -128,7 +108,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth \
--output_name clip-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
......@@ -136,10 +117,33 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth \
--output /Path/To/output \
--output ./output \
--output_ext .pth \
--output_name clip-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
```
## Feature 2: Format Conversion Between Diffusers and Lightx2v
Supports mutual conversion between Diffusers architecture and LightX2V architecture
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
# 模型转换工具
一款功能强大的实用工具,可在不同格式之间转换模型权重并执行量化任务。
## Diffusers
支持 Diffusers 架构与 LightX2V 架构之间的相互转换
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
该converter工具可在不同格式之间转换模型权重。
## 量化
## 功能1:转换量化模型
该工具支持将 **FP32/FP16/BF16** 模型权重转换为 **INT8、FP8** 类型。
......@@ -36,7 +14,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -48,7 +26,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -62,7 +40,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--lora_path /Path/To/LoRA1/ /Path/To/LoRA2/ \
--lora_alpha 1.0 1.0 \
......@@ -78,7 +56,7 @@ python converter.py \
--output /Path/To/output \
--output_ext ..safetensors \
--output_name hunyuan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type hunyuan_dit \
--quantized
```
......@@ -89,7 +67,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name hunyuan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type hunyuan_dit \
--quantized
```
......@@ -103,7 +81,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -111,10 +90,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth \
--output /Path/To/output \
--output /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/fp8 \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -128,7 +108,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth \
--output_name clip-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
......@@ -136,10 +117,33 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth \
--output /Path/To/output \
--output ./output \
--output_ext .pth \
--output_name clip-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
```
## 功能2:Diffusers和Lightx2v之间的格式转换
支持 Diffusers 架构与 LightX2V 架构之间的相互转换
### Lightx2v->Diffusers
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P \
--output /Path/To/Wan2.1-I2V-14B-480P-Diffusers \
--direction forward \
--save_by_block
```
### Diffusers->Lightx2v
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P-Diffusers \
--output /Path/To/Wan2.1-I2V-14B-480P \
--direction backward \
--save_by_block
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment