Commit ae089db4 authored by GoatWu's avatar GoatWu
Browse files

Merge branch 'main' of github.com:ModelTC/lightx2v into dev-debug-distill

parents 8b213df0 4796fc6e
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/adacache/wan_i2v_ada.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_ada.mp4
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/custom/wan_i2v_custom_480p.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_custom.mp4
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/taylorseer/wan_i2v_tea_480p.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v_taylor.mp4
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/adacache/wan_t2v_ada.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_ada.mp4
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/custom/wan_t2v_custom_1_3b.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_custom.mp4
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/taylorseer/wan_t2v_taylorseer.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_taylor.mp4
......@@ -32,7 +32,7 @@ python -m lightx2v.infer \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/caching/teacache/wan_t2v_1_3b.json \
--config_json ${lightx2v_path}/configs/caching/teacache/wan_t2v_1_3b_tea_480p.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--negative_prompt "镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_tea.mp4
......@@ -10,6 +10,7 @@ from safetensors import safe_open, torch as st
from loguru import logger
from tqdm import tqdm
from collections import defaultdict
from qtorch.quant import float_quantize
def get_key_mapping_rules(direction, model_type):
......@@ -314,7 +315,8 @@ def quantize_tensor(w, w_bit=8, dtype=torch.int8):
max_val = w.abs().amax(dim=1, keepdim=True).clamp(min=1e-5)
if dtype == torch.float8_e4m3fn:
qmin, qmax = -448, 448
finfo = torch.finfo(dtype)
qmin, qmax = finfo.min, finfo.max
elif dtype == torch.int8:
qmin, qmax = -128, 127
......@@ -322,7 +324,9 @@ def quantize_tensor(w, w_bit=8, dtype=torch.int8):
scales = max_val / qmax
if dtype == torch.float8_e4m3fn:
w_q = torch.clamp(w / scales, qmin, qmax).to(dtype)
scaled_tensor = w / scales
scaled_tensor = torch.clip(scaled_tensor, qmin, qmax)
w_q = float_quantize(scaled_tensor.float(), 4, 3, rounding="nearest").to(dtype)
else:
w_q = torch.clamp(torch.round(w / scales), qmin, qmax).to(dtype)
......@@ -341,7 +345,8 @@ def quantize_model(
target_keys=["attn", "ffn"],
key_idx=2,
ignore_key=None,
dtype=torch.int8,
linear_dtype=torch.int8,
non_linear_dtype=torch.float,
):
"""
Quantize model weights in-place
......@@ -370,16 +375,20 @@ def quantize_model(
# Skip non-tensors, small tensors, and non-2D tensors
if not isinstance(tensor, torch.Tensor) or tensor.dim() != 2:
if tensor.dtype != non_linear_dtype:
weights[key] = tensor.to(non_linear_dtype)
continue
# Check if key matches target modules
parts = key.split(".")
if len(parts) < key_idx + 1 or parts[key_idx] not in target_keys:
if tensor.dtype != non_linear_dtype:
weights[key] = tensor.to(non_linear_dtype)
continue
try:
# Quantize tensor and store results
w_q, scales = quantize_tensor(tensor, w_bit, dtype)
w_q, scales = quantize_tensor(tensor, w_bit, linear_dtype)
# Replace original tensor and store scales
weights[key] = w_q
......@@ -500,7 +509,8 @@ def convert_weights(args):
target_keys=args.target_keys,
key_idx=args.key_idx,
ignore_key=args.ignore_key,
dtype=args.dtype,
linear_dtype=args.linear_dtype,
non_linear_dtype=args.non_linear_dtype,
)
os.makedirs(args.output, exist_ok=True)
......@@ -637,10 +647,17 @@ def main():
help="Device to use for quantization (cpu/cuda)",
)
parser.add_argument(
"--dtype",
"--linear_dtype",
type=str,
choices=["torch.int8", "torch.float8_e4m3fn"],
help="Data type for quantization",
help="Data type for linear",
)
parser.add_argument(
"--non_linear_dtype",
type=str,
default="torch.float32",
choices=["torch.bfloat16", "torch.float16"],
help="Data type for non-linear",
)
parser.add_argument("--lora_path", type=str, nargs="*", help="Path(s) to LoRA file(s). Can specify multiple paths separated by spaces.")
parser.add_argument(
......@@ -654,12 +671,8 @@ def main():
args = parser.parse_args()
if args.quantized:
if args.dtype == "torch.int8":
args.dtype = torch.int8
elif args.dtype == "torch.float8_e4m3fn":
args.dtype = torch.float8_e4m3fn
else:
raise ValueError(f"Not support dtype :{args.dtype}")
args.linear_dtype = eval(args.linear_dtype)
args.non_linear_dtype = eval(args.non_linear_dtype)
model_type_keys_map = {
"wan_dit": {
......
......@@ -36,7 +36,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -48,7 +48,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -62,7 +62,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--lora_path /Path/To/LoRA1/ /Path/To/LoRA2/ \
--lora_alpha 1.0 1.0 \
......@@ -78,7 +78,7 @@ python converter.py \
--output /Path/To/output \
--output_ext ..safetensors \
--output_name hunyuan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type hunyuan_dit \
--quantized
```
......@@ -89,7 +89,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name hunyuan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type hunyuan_dit \
--quantized
```
......@@ -103,7 +103,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -111,10 +112,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth \
--output /Path/To/output \
--output /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/fp8 \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -128,7 +130,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth \
--output_name clip-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
......@@ -136,10 +139,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth \
--output /Path/To/output \
--output ./output \
--output_ext .pth \
--output_name clip-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
```
......@@ -36,7 +36,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -48,7 +48,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type wan_dit \
--quantized \
--save_by_block
......@@ -62,7 +62,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name wan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type wan_dit \
--lora_path /Path/To/LoRA1/ /Path/To/LoRA2/ \
--lora_alpha 1.0 1.0 \
......@@ -78,7 +78,7 @@ python converter.py \
--output /Path/To/output \
--output_ext ..safetensors \
--output_name hunyuan_int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--model_type hunyuan_dit \
--quantized
```
......@@ -89,7 +89,7 @@ python converter.py \
--output /Path/To/output \
--output_ext .safetensors \
--output_name hunyuan_fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--model_type hunyuan_dit \
--quantized
```
......@@ -103,7 +103,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -111,10 +112,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth \
--output /Path/To/output \
--output /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/fp8 \
--output_ext .pth\
--output_name models_t5_umt5-xxl-enc-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.bfloat16 \
--model_type wan_t5 \
--quantized
```
......@@ -128,7 +130,8 @@ python converter.py \
--output /Path/To/output \
--output_ext .pth \
--output_name clip-int8 \
--dtype torch.int8 \
--linear_dtype torch.int8 \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
......@@ -136,10 +139,11 @@ python converter.py \
```bash
python converter.py \
--source /Path/To/Wan-AI/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth \
--output /Path/To/output \
--output ./output \
--output_ext .pth \
--output_name clip-fp8 \
--dtype torch.float8_e4m3fn \
--linear_dtype torch.float8_e4m3fn \
--non_linear_dtype torch.float16 \
--model_type wan_clip \
--quantized
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment