adding a padding strategy for SP (#37)

adding a padding strategy for SP --------- Co-authored-by: “de1star” <“843414674@qq.com”>

adding a padding strategy for SP (#37)
adding a padding strategy for SP --------- Co-authored-by: “de1star” <“843414674@qq.com”>
bd1e469c · Xinchi Huang · GitHub · 7c7c8266 · bd1e469c · bd1e469c
Commit bd1e469c authored May 12, 2025 by Xinchi Huang Committed by GitHub May 12, 2025
6 changed files
--- a/configs/wan_i2v_dist.json
+++ b/configs/wan_i2v_dist.json
+{
+    "infer_steps": 40,
+    "target_video_length": 81,
+    "target_height": 480,
+    "target_width": 832,
+    "attention_type": "flash_attn3",
+    "seed": 42,
+    "sample_guide_scale": 5,
+    "sample_shift": 5,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "parallel_attn_type": "ulysses",
+    "parallel_vae": true
+}
--- a/configs/wan_t2v_dist.json
+++ b/configs/wan_t2v_dist.json
+{
+    "infer_steps": 50,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "attention_type": "flash_attn3",
+    "seed": 42,
+    "sample_guide_scale": 6,
+    "sample_shift": 8,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "parallel_attn_type": "ulysses",
+    "parallel_vae": true
+}
--- a/lightx2v/attentions/distributed/ulysses/attn.py
+++ b/lightx2v/attentions/distributed/ulysses/attn.py
@@ -58,13 +58,13 @@ def ulysses_attn(q, k, v, img_qkv_len, cu_seqlens_qkv, attention_type="flash_att
    v = torch.cat((img_v, txt_v), dim=0)
    # 初始化累积序列长度张量
-    cu_seqlens_qkv = torch.zeros([3], dtype=torch.int32, device="cuda")
+    cu_seqlens_qkv = torch.zeros([2], dtype=torch.int32, device="cuda")
    s = txt_qkv_len + img_q.shape[0]  # 计算文本和图像的总长度
    s1 = s  # 当前样本的结束位置
    cu_seqlens_qkv[1] = s1  # 设置累积序列长度
    if txt_mask_len:
        s2 = txt_mask_len + img_q.shape[0]  # 文本掩码的结束位置
-        cu_seqlens_qkv[2] = s2  # 设置累积序列长度
+        cu_seqlens_qkv = torch.cat(cu_seqlens_qkv, s2)
    max_seqlen_qkv = img_q.shape[0] + txt_q.shape[0]  # 最大序列长度
    # 调用注意力函数计算注意力结果

--- a/lightx2v/attentions/distributed/utils/wan/processor.py
+++ b/lightx2v/attentions/distributed/utils/wan/processor.py
 from re import split
 import torch
 import torch.distributed as dist
+import torch.nn.functional as F
+PADDING_SIZE = None
 def pre_process(x):
    world_size = dist.get_world_size()
    cur_rank = dist.get_rank()
+    padding_size = (world_size - (x.shape[0] % world_size)) % world_size
+    if padding_size > 0:
+        # 使用 F.pad 填充第一维
+        x = F.pad(x, (0, 0, 0, padding_size))  # (后维度填充, 前维度填充)
    x = torch.chunk(x, world_size, dim=0)[cur_rank]
    return x

--- a/scripts/run_wan_i2v_dist.sh
+++ b/scripts/run_wan_i2v_dist.sh
+#!/bin/bash
+# set path and first
+lightx2v_path=
+model_path=
+# check section
+if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
+    cuda_devices=1,2,3,4
+    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
+    export CUDA_VISIBLE_DEVICES=${cuda_devices}
+fi
+if [ -z "${lightx2v_path}" ]; then
+    echo "Error: lightx2v_path is not set. Please set this variable first."
+    exit 1
+fi
+if [ -z "${model_path}" ]; then
+    echo "Error: model_path is not set. Please set this variable first."
+    exit 1
+fi
+export TOKENIZERS_PARALLELISM=false
+export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
+export ENABLE_PROFILING_DEBUG=true
+export ENABLE_GRAPH_MODE=false
+torchrun --nproc_per_node=4 -m lightx2v.infer \
+--model_cls wan2.1 \
+--task i2v \
+--model_path $model_path \
+--config_json ${lightx2v_path}/configs/wan_i2v_dist.json \
+--prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." \
+--negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
+--image_path ${lightx2v_path}/assets/inputs/imgs/img_0.jpg \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_i2v.mp4
--- a/scripts/run_wan_t2v_dist.sh
+++ b/scripts/run_wan_t2v_dist.sh
@@ -6,7 +6,7 @@ model_path=
 # check section
 if [ -z "${CUDA_VISIBLE_DEVICES}" ]; then
-    cuda_devices=0,1,2,3
+    cuda_devices=1,2,3,4
    echo "Warn: CUDA_VISIBLE_DEVICES is not set, using default value: ${cuda_devices}, change at shell script or set env variable."
    export CUDA_VISIBLE_DEVICES=${cuda_devices}
 fi
@@ -26,39 +26,13 @@ export TOKENIZERS_PARALLELISM=false
 export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
 export ENABLE_PROFILING_DEBUG=true
+export ENABLE_GRAPH_MODE=false
-torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \
+torchrun --nproc_per_node=4 -m lightx2v.infer \
 --model_cls wan2.1 \
 --task t2v \
 --model_path $model_path \
+--config_json ${lightx2v_path}/configs/wan_t2v_dist.json \
 --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--infer_steps 50 \
--target_video_length 84 \
--target_width  832 \
--target_height 480 \
--attention_type flash_attn2 \
--seed 42 \
 --negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
--sample_guide_scale 6 \
+--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v.mp4
--sample_shift 8 \
--parallel_attn_type ring \
--parallel_vae \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_dist_ring.mp4
-torchrun --nproc_per_node=4 ${lightx2v_path}/lightx2v/infer.py \
--model_cls wan2.1 \
--task t2v \
--model_path $model_path \
--prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage." \
--infer_steps 50 \
--target_video_length 81 \
--target_width  832 \
--target_height 480 \
--attention_type flash_attn2 \
--seed 42 \
--negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
--sample_guide_scale 6 \
--sample_shift 8 \
--parallel_attn_type ulysses \
--parallel_vae \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_wan_t2v_dist_ulysses.mp4