Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xuwx1
LightX2V
Commits
826e9b03
Commit
826e9b03
authored
Aug 08, 2025
by
wangshankun
Browse files
Merge branch 'main' of
https://github.com/ModelTC/LightX2V
into main
parents
6de0996c
1ff745e4
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
265 additions
and
3 deletions
+265
-3
configs/dist_infer/wan22_ti2v_i2v_cfg.json
configs/dist_infer/wan22_ti2v_i2v_cfg.json
+23
-0
configs/dist_infer/wan22_ti2v_i2v_cfg_ulysses.json
configs/dist_infer/wan22_ti2v_i2v_cfg_ulysses.json
+25
-0
configs/dist_infer/wan22_ti2v_i2v_ulysses.json
configs/dist_infer/wan22_ti2v_i2v_ulysses.json
+24
-0
configs/dist_infer/wan22_ti2v_t2v_cfg.json
configs/dist_infer/wan22_ti2v_t2v_cfg.json
+22
-0
configs/dist_infer/wan22_ti2v_t2v_cfg_ulysses.json
configs/dist_infer/wan22_ti2v_t2v_cfg_ulysses.json
+24
-0
configs/dist_infer/wan22_ti2v_t2v_ulysses.json
configs/dist_infer/wan22_ti2v_t2v_ulysses.json
+23
-0
lightx2v/models/networks/wan/infer/dist_infer/transformer_infer.py
...models/networks/wan/infer/dist_infer/transformer_infer.py
+5
-3
lightx2v/models/networks/wan/infer/pre_infer.py
lightx2v/models/networks/wan/infer/pre_infer.py
+4
-0
lightx2v/utils/set_config.py
lightx2v/utils/set_config.py
+1
-0
scripts/dist_infer/run_wan22_ti2v_i2v_cfg.sh
scripts/dist_infer/run_wan22_ti2v_i2v_cfg.sh
+19
-0
scripts/dist_infer/run_wan22_ti2v_i2v_cfg_ulysses.sh
scripts/dist_infer/run_wan22_ti2v_i2v_cfg_ulysses.sh
+19
-0
scripts/dist_infer/run_wan22_ti2v_i2v_ulysses.sh
scripts/dist_infer/run_wan22_ti2v_i2v_ulysses.sh
+19
-0
scripts/dist_infer/run_wan22_ti2v_t2v_cfg.sh
scripts/dist_infer/run_wan22_ti2v_t2v_cfg.sh
+19
-0
scripts/dist_infer/run_wan22_ti2v_t2v_cfg_ulysses.sh
scripts/dist_infer/run_wan22_ti2v_t2v_cfg_ulysses.sh
+19
-0
scripts/dist_infer/run_wan22_ti2v_t2v_ulysses.sh
scripts/dist_infer/run_wan22_ti2v_t2v_ulysses.sh
+19
-0
No files found.
configs/dist_infer/wan22_ti2v_i2v_cfg.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"use_image_encoder"
:
false
,
"parallel"
:
{
"cfg_p_size"
:
2
}
}
configs/dist_infer/wan22_ti2v_i2v_cfg_ulysses.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"use_image_encoder"
:
false
,
"parallel"
:
{
"seq_p_size"
:
4
,
"seq_p_attn_type"
:
"ulysses"
,
"cfg_p_size"
:
2
}
}
configs/dist_infer/wan22_ti2v_i2v_ulysses.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"use_image_encoder"
:
false
,
"parallel"
:
{
"seq_p_size"
:
4
,
"seq_p_attn_type"
:
"ulysses"
}
}
configs/dist_infer/wan22_ti2v_t2v_cfg.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"parallel"
:
{
"cfg_p_size"
:
2
}
}
configs/dist_infer/wan22_ti2v_t2v_cfg_ulysses.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"parallel"
:
{
"seq_p_size"
:
4
,
"seq_p_attn_type"
:
"ulysses"
,
"cfg_p_size"
:
2
}
}
configs/dist_infer/wan22_ti2v_t2v_ulysses.json
0 → 100755
View file @
826e9b03
{
"infer_steps"
:
50
,
"target_video_length"
:
121
,
"text_len"
:
512
,
"target_height"
:
704
,
"target_width"
:
1280
,
"num_channels_latents"
:
48
,
"vae_stride"
:
[
4
,
16
,
16
],
"self_attn_1_type"
:
"flash_attn3"
,
"cross_attn_1_type"
:
"flash_attn3"
,
"cross_attn_2_type"
:
"flash_attn3"
,
"seed"
:
42
,
"sample_guide_scale"
:
5.0
,
"sample_shift"
:
5.0
,
"enable_cfg"
:
true
,
"cpu_offload"
:
false
,
"offload_granularity"
:
"model"
,
"fps"
:
24
,
"parallel"
:
{
"seq_p_size"
:
4
,
"seq_p_attn_type"
:
"ulysses"
}
}
lightx2v/models/networks/wan/infer/dist_infer/transformer_infer.py
View file @
826e9b03
...
@@ -12,7 +12,7 @@ class WanTransformerDistInfer(WanTransformerInfer):
...
@@ -12,7 +12,7 @@ class WanTransformerDistInfer(WanTransformerInfer):
self
.
seq_p_group
=
self
.
config
[
"device_mesh"
].
get_group
(
mesh_dim
=
"seq_p"
)
self
.
seq_p_group
=
self
.
config
[
"device_mesh"
].
get_group
(
mesh_dim
=
"seq_p"
)
def
infer
(
self
,
weights
,
grid_sizes
,
embed
,
x
,
embed0
,
seq_lens
,
freqs
,
context
,
audio_dit_blocks
=
None
):
def
infer
(
self
,
weights
,
grid_sizes
,
embed
,
x
,
embed0
,
seq_lens
,
freqs
,
context
,
audio_dit_blocks
=
None
):
x
=
self
.
dist_pre_process
(
x
)
x
,
embed0
=
self
.
dist_pre_process
(
x
,
embed0
)
x
=
super
().
infer
(
weights
,
grid_sizes
,
embed
,
x
,
embed0
,
seq_lens
,
freqs
,
context
,
audio_dit_blocks
)
x
=
super
().
infer
(
weights
,
grid_sizes
,
embed
,
x
,
embed0
,
seq_lens
,
freqs
,
context
,
audio_dit_blocks
)
x
=
self
.
dist_post_process
(
x
)
x
=
self
.
dist_post_process
(
x
)
return
x
return
x
...
@@ -24,7 +24,7 @@ class WanTransformerDistInfer(WanTransformerInfer):
...
@@ -24,7 +24,7 @@ class WanTransformerDistInfer(WanTransformerInfer):
freqs_i
=
self
.
compute_freqs_dist
(
q
.
size
(
0
),
q
.
size
(
2
)
//
2
,
grid_sizes
,
freqs
)
freqs_i
=
self
.
compute_freqs_dist
(
q
.
size
(
0
),
q
.
size
(
2
)
//
2
,
grid_sizes
,
freqs
)
return
freqs_i
return
freqs_i
def
dist_pre_process
(
self
,
x
):
def
dist_pre_process
(
self
,
x
,
embed0
):
world_size
=
dist
.
get_world_size
(
self
.
seq_p_group
)
world_size
=
dist
.
get_world_size
(
self
.
seq_p_group
)
cur_rank
=
dist
.
get_rank
(
self
.
seq_p_group
)
cur_rank
=
dist
.
get_rank
(
self
.
seq_p_group
)
...
@@ -35,7 +35,9 @@ class WanTransformerDistInfer(WanTransformerInfer):
...
@@ -35,7 +35,9 @@ class WanTransformerDistInfer(WanTransformerInfer):
x
=
F
.
pad
(
x
,
(
0
,
0
,
0
,
padding_size
))
# (后维度填充, 前维度填充)
x
=
F
.
pad
(
x
,
(
0
,
0
,
0
,
padding_size
))
# (后维度填充, 前维度填充)
x
=
torch
.
chunk
(
x
,
world_size
,
dim
=
0
)[
cur_rank
]
x
=
torch
.
chunk
(
x
,
world_size
,
dim
=
0
)[
cur_rank
]
return
x
if
self
.
config
[
"model_cls"
].
startswith
(
"wan2.2"
):
embed0
=
torch
.
chunk
(
embed0
,
world_size
,
dim
=
0
)[
cur_rank
]
return
x
,
embed0
def
dist_post_process
(
self
,
x
):
def
dist_post_process
(
self
,
x
):
world_size
=
dist
.
get_world_size
(
self
.
seq_p_group
)
world_size
=
dist
.
get_world_size
(
self
.
seq_p_group
)
...
...
lightx2v/models/networks/wan/infer/pre_infer.py
View file @
826e9b03
...
@@ -73,6 +73,10 @@ class WanPreInfer:
...
@@ -73,6 +73,10 @@ class WanPreInfer:
x
=
x
.
flatten
(
2
).
transpose
(
1
,
2
).
contiguous
()
x
=
x
.
flatten
(
2
).
transpose
(
1
,
2
).
contiguous
()
seq_lens
=
torch
.
tensor
(
x
.
size
(
1
),
dtype
=
torch
.
long
).
cuda
().
unsqueeze
(
0
)
seq_lens
=
torch
.
tensor
(
x
.
size
(
1
),
dtype
=
torch
.
long
).
cuda
().
unsqueeze
(
0
)
# wan2.2_moe会对t做扩展,我们发现这里做不做影响不大,而且做了拓展会增加耗时,目前忠实原作代码,后续可以考虑去掉
if
self
.
config
[
"model_cls"
]
==
"wan2.2_moe"
:
t
=
t
.
expand
(
seq_lens
[
0
])
embed
=
sinusoidal_embedding_1d
(
self
.
freq_dim
,
t
.
flatten
())
embed
=
sinusoidal_embedding_1d
(
self
.
freq_dim
,
t
.
flatten
())
if
self
.
enable_dynamic_cfg
:
if
self
.
enable_dynamic_cfg
:
s
=
torch
.
tensor
([
self
.
cfg_scale
],
dtype
=
torch
.
float32
).
to
(
x
.
device
)
s
=
torch
.
tensor
([
self
.
cfg_scale
],
dtype
=
torch
.
float32
).
to
(
x
.
device
)
...
...
lightx2v/utils/set_config.py
View file @
826e9b03
...
@@ -25,6 +25,7 @@ def get_default_config():
...
@@ -25,6 +25,7 @@ def get_default_config():
"seq_parallel"
:
False
,
"seq_parallel"
:
False
,
"cfg_parallel"
:
False
,
"cfg_parallel"
:
False
,
"enable_cfg"
:
False
,
"enable_cfg"
:
False
,
"use_image_encoder"
:
True
,
}
}
return
default_config
return
default_config
...
...
scripts/dist_infer/run_wan22_ti2v_i2v_cfg.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
2
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_i2v_cfg.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_i2v_parallel_cfg.mp4
scripts/dist_infer/run_wan22_ti2v_i2v_cfg_ulysses.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
8
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_i2v_cfg_ulysses.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_i2v_parallel_cfg_ulysses.mp4
scripts/dist_infer/run_wan22_ti2v_i2v_ulysses.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
4
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_i2v_ulysses.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_i2v_ulysses.mp4
scripts/dist_infer/run_wan22_ti2v_t2v_cfg.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
2
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_t2v_cfg.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_t2v_parallel_cfg.mp4
scripts/dist_infer/run_wan22_ti2v_t2v_cfg_ulysses.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
8
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_t2v_cfg_ulysses.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_t2v_parallel_cfg_ulysses.mp4
scripts/dist_infer/run_wan22_ti2v_t2v_ulysses.sh
0 → 100755
View file @
826e9b03
#!/bin/bash
# set path and first
lightx2v_path
=
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
torchrun
--nproc_per_node
=
4
-m
lightx2v.infer
\
--model_cls
wan2.2
\
--task
t2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/dist_infer/wan22_ti2v_t2v_ulysses.json
\
--prompt
"Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage"
\
--negative_prompt
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
\
--save_video_path
${
lightx2v_path
}
/save_results/output_lightx2v_wan22_ti2v_t2v_parallel_ulysses.mp4
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment