Commit bd1e469c authored by Xinchi Huang's avatar Xinchi Huang Committed by GitHub
Browse files

adding a padding strategy for SP (#37)



adding a padding strategy for SP

---------
Co-authored-by: default avatar“de1star” <“843414674@qq.com”>
parent 7c7c8266
{
"infer_steps": 40,
"target_video_length": 81,
"target_height": 480,
"target_width": 832,
"attention_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 5,
"sample_shift": 5,
"enable_cfg": true,
"cpu_offload": false,
"parallel_attn_type": "ulysses",
"parallel_vae": true
}
{
"infer_steps": 50,
"target_video_length": 81,
"text_len": 512,
"target_height": 480,
"target_width": 832,
"attention_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6,
"sample_shift": 8,
"enable_cfg": true,
"cpu_offload": false,
"parallel_attn_type": "ulysses",
"parallel_vae": true
}
...@@ -58,13 +58,13 @@ def ulysses_attn(q, k, v, img_qkv_len, cu_seqlens_qkv, attention_type="flash_att ...@@ -58,13 +58,13 @@ def ulysses_attn(q, k, v, img_qkv_len, cu_seqlens_qkv, attention_type="flash_att
v = torch.cat((img_v, txt_v), dim=0) v = torch.cat((img_v, txt_v), dim=0)
# 初始化累积序列长度张量 # 初始化累积序列长度张量
cu_seqlens_qkv = torch.zeros([3], dtype=torch.int32, device="cuda") cu_seqlens_qkv = torch.zeros([2], dtype=torch.int32, device="cuda")
s = txt_qkv_len + img_q.shape[0] # 计算文本和图像的总长度 s = txt_qkv_len + img_q.shape[0] # 计算文本和图像的总长度
s1 = s # 当前样本的结束位置 s1 = s # 当前样本的结束位置
cu_seqlens_qkv[1] = s1 # 设置累积序列长度 cu_seqlens_qkv[1] = s1 # 设置累积序列长度
if txt_mask_len: if txt_mask_len:
s2 = txt_mask_len + img_q.shape[0] # 文本掩码的结束位置 s2 = txt_mask_len + img_q.shape[0] # 文本掩码的结束位置
cu_seqlens_qkv[2] = s2 # 设置累积序列长度 cu_seqlens_qkv = torch.cat(cu_seqlens_qkv, s2)
max_seqlen_qkv = img_q.shape[0] + txt_q.shape[0] # 最大序列长度 max_seqlen_qkv = img_q.shape[0] + txt_q.shape[0] # 最大序列长度
# 调用注意力函数计算注意力结果 # 调用注意力函数计算注意力结果
......
from re import split from re import split
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.nn.functional as F
PADDING_SIZE = None
def pre_process(x): def pre_process(x):
world_size = dist.get_world_size() world_size = dist.get_world_size()
cur_rank = dist.get_rank() cur_rank = dist.get_rank()
padding_size = (world_size - (x.shape[0] % world_size)) % world_size
if padding_size > 0:
# 使用 F.pad 填充第一维
x = F.pad(x, (0, 0, 0, padding_size)) # (后维度填充, 前维度填充)
x = torch.chunk(x, world_size, dim=0)[cur_rank] x = torch.chunk(x, world_size, dim=0)[cur_rank]
return x return x
......
#!/bin/bash
# set path and first
lightx2v_path=
model_path=
# check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=1,2,3,4
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi
if [ -z "${lightx2v_path}" ]; then
echo "Error: lightx2v_path is not set. Please set this variable first."
exit 1
fi
if [ -z "${model_path}" ]; then
echo "Error: model_path is not set. Please set this variable first."
exit 1
fi
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
torchrun --nproc_per_node=4 -m lightx2v.infer \
--model_cls wan2.1 \
--task i2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/wan_i2v_dist.json \
--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
...@@ -6,7 +6,7 @@ model_path= ...@@ -6,7 +6,7 @@ model_path=
# check section # check section
if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
cuda_devices=0,1,2,3 cuda_devices=1,2,3,4
echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable." echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
export CUDA_VISIBLE_DEVICES=${cuda_devices} export CUDA_VISIBLE_DEVICES=${cuda_devices}
fi fi
...@@ -26,39 +26,13 @@ export TOKENIZERS_PARALLELISM=false ...@@ -26,39 +26,13 @@ export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true export ENABLE_PROFILING_DEBUG=true
export ENABLE_GRAPH_MODE=false
torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \ torchrun --nproc_per_node=4 -m lightx2v.infer \
--model_cls wan2.1 \ --model_cls wan2.1 \
--task t2v \ --task t2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/wan_t2v_dist.json \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \ --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--infer_steps 50 \
--target_video_length 84 \
--target_width 832 \
--target_height 480 \
--attention_type flash_attn2 \
--seed 42 \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--sample_guide_scale 6 \ --save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
--sample_shift 8 \
--parallel_attn_type ring \
--parallel_vae \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_dist_ring.mp4
torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--infer_steps 50 \
--target_video_length 81 \
--target_width 832 \
--target_height 480 \
--attention_type flash_attn2 \
--seed 42 \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--sample_guide_scale 6 \
--sample_shift 8 \
--parallel_attn_type ulysses \
--parallel_vae \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_dist_ulysses.mp4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment