Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xuwx1
LightX2V
Commits
bba65ffd
Commit
bba65ffd
authored
Aug 27, 2025
by
gushiqiao
Committed by
GitHub
Aug 27, 2025
Browse files
[Fix] fix encoder bugs. (#254)
parent
09d769cc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
17 deletions
+14
-17
lightx2v/models/runners/default_runner.py
lightx2v/models/runners/default_runner.py
+4
-4
lightx2v/models/runners/qwen_image/qwen_image_runner.py
lightx2v/models/runners/qwen_image/qwen_image_runner.py
+2
-4
lightx2v/models/runners/wan/wan_vace_runner.py
lightx2v/models/runners/wan/wan_vace_runner.py
+5
-5
scripts/wan/run_wan_i2v_audio.sh
scripts/wan/run_wan_i2v_audio.sh
+3
-4
No files found.
lightx2v/models/runners/default_runner.py
View file @
bba65ffd
...
@@ -174,8 +174,8 @@ class DefaultRunner(BaseRunner):
...
@@ -174,8 +174,8 @@ class DefaultRunner(BaseRunner):
@
ProfilingContext
(
"Run Encoders"
)
@
ProfilingContext
(
"Run Encoders"
)
def
_run_input_encoder_local_flf2v
(
self
):
def
_run_input_encoder_local_flf2v
(
self
):
prompt
=
self
.
config
[
"prompt_enhanced"
]
if
self
.
config
[
"use_prompt_enhancer"
]
else
self
.
config
[
"prompt"
]
prompt
=
self
.
config
[
"prompt_enhanced"
]
if
self
.
config
[
"use_prompt_enhancer"
]
else
self
.
config
[
"prompt"
]
first_frame
=
self
.
read_image_input
(
self
.
config
[
"image_path"
])
first_frame
,
_
=
self
.
read_image_input
(
self
.
config
[
"image_path"
])
last_frame
=
self
.
read_image_input
(
self
.
config
[
"last_frame_path"
])
last_frame
,
_
=
self
.
read_image_input
(
self
.
config
[
"last_frame_path"
])
clip_encoder_out
=
self
.
run_image_encoder
(
first_frame
,
last_frame
)
if
self
.
config
.
get
(
"use_image_encoder"
,
True
)
else
None
clip_encoder_out
=
self
.
run_image_encoder
(
first_frame
,
last_frame
)
if
self
.
config
.
get
(
"use_image_encoder"
,
True
)
else
None
vae_encode_out
=
self
.
run_vae_encoder
(
first_frame
,
last_frame
)
vae_encode_out
=
self
.
run_vae_encoder
(
first_frame
,
last_frame
)
text_encoder_output
=
self
.
run_text_encoder
(
prompt
,
first_frame
)
text_encoder_output
=
self
.
run_text_encoder
(
prompt
,
first_frame
)
...
@@ -222,13 +222,13 @@ class DefaultRunner(BaseRunner):
...
@@ -222,13 +222,13 @@ class DefaultRunner(BaseRunner):
# 2. main inference loop
# 2. main inference loop
latents
,
generator
=
self
.
run_segment
(
total_steps
=
total_steps
)
latents
,
generator
=
self
.
run_segment
(
total_steps
=
total_steps
)
# 3. vae decoder
# 3. vae decoder
self
.
gen_video
=
self
.
run_vae_decoder
(
latents
,
generator
)
self
.
gen_video
=
self
.
run_vae_decoder
(
latents
)
# 4. default do nothing
# 4. default do nothing
self
.
end_run_segment
()
self
.
end_run_segment
()
self
.
end_run
()
self
.
end_run
()
@
ProfilingContext
(
"Run VAE Decoder"
)
@
ProfilingContext
(
"Run VAE Decoder"
)
def
run_vae_decoder
(
self
,
latents
,
generator
):
def
run_vae_decoder
(
self
,
latents
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
self
.
vae_decoder
=
self
.
load_vae_decoder
()
self
.
vae_decoder
=
self
.
load_vae_decoder
()
images
=
self
.
vae_decoder
.
decode
(
latents
)
images
=
self
.
vae_decoder
.
decode
(
latents
)
...
...
lightx2v/models/runners/qwen_image/qwen_image_runner.py
100644 → 100755
View file @
bba65ffd
...
@@ -60,8 +60,6 @@ class QwenImageRunner(DefaultRunner):
...
@@ -60,8 +60,6 @@ class QwenImageRunner(DefaultRunner):
self
.
load_model
()
self
.
load_model
()
elif
self
.
config
.
get
(
"lazy_load"
,
False
):
elif
self
.
config
.
get
(
"lazy_load"
,
False
):
assert
self
.
config
.
get
(
"cpu_offload"
,
False
)
assert
self
.
config
.
get
(
"cpu_offload"
,
False
)
self
.
run_dit
=
self
.
_run_dit_local
self
.
run_vae_decoder
=
self
.
_run_vae_decoder_local
if
self
.
config
[
"task"
]
==
"t2i"
:
if
self
.
config
[
"task"
]
==
"t2i"
:
self
.
run_input_encoder
=
self
.
_run_input_encoder_local_t2i
self
.
run_input_encoder
=
self
.
_run_input_encoder_local_t2i
elif
self
.
config
[
"task"
]
==
"i2i"
:
elif
self
.
config
[
"task"
]
==
"i2i"
:
...
@@ -156,7 +154,7 @@ class QwenImageRunner(DefaultRunner):
...
@@ -156,7 +154,7 @@ class QwenImageRunner(DefaultRunner):
self
.
vfi_model
=
self
.
load_vfi_model
()
if
"video_frame_interpolation"
in
self
.
config
else
None
self
.
vfi_model
=
self
.
load_vfi_model
()
if
"video_frame_interpolation"
in
self
.
config
else
None
@
ProfilingContext
(
"Run VAE Decoder"
)
@
ProfilingContext
(
"Run VAE Decoder"
)
def
_
run_vae_decoder
_local
(
self
,
latents
,
generator
):
def
run_vae_decoder
(
self
,
latents
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
self
.
vae_decoder
=
self
.
load_vae
()
self
.
vae_decoder
=
self
.
load_vae
()
images
=
self
.
vae
.
decode
(
latents
)
images
=
self
.
vae
.
decode
(
latents
)
...
@@ -174,7 +172,7 @@ class QwenImageRunner(DefaultRunner):
...
@@ -174,7 +172,7 @@ class QwenImageRunner(DefaultRunner):
self
.
set_target_shape
()
self
.
set_target_shape
()
latents
,
generator
=
self
.
run_dit
()
latents
,
generator
=
self
.
run_dit
()
images
=
self
.
run_vae_decoder
(
latents
,
generator
)
images
=
self
.
run_vae_decoder
(
latents
)
image
=
images
[
0
]
image
=
images
[
0
]
image
.
save
(
f
"
{
self
.
config
.
save_video_path
}
"
)
image
.
save
(
f
"
{
self
.
config
.
save_video_path
}
"
)
...
...
lightx2v/models/runners/wan/wan_vace_runner.py
View file @
bba65ffd
...
@@ -96,22 +96,22 @@ class WanVaceRunner(WanRunner):
...
@@ -96,22 +96,22 @@ class WanVaceRunner(WanRunner):
assert
len
(
frames
)
==
len
(
ref_images
)
assert
len
(
frames
)
==
len
(
ref_images
)
if
masks
is
None
:
if
masks
is
None
:
latents
=
self
.
vae_encoder
.
encode
(
frame
s
)
latents
=
[
self
.
vae_encoder
.
encode
(
frame
.
unsqueeze
(
0
))
for
frame
in
frames
]
else
:
else
:
masks
=
[
torch
.
where
(
m
>
0.5
,
1.0
,
0.0
)
for
m
in
masks
]
masks
=
[
torch
.
where
(
m
>
0.5
,
1.0
,
0.0
)
for
m
in
masks
]
inactive
=
[
i
*
(
1
-
m
)
+
0
*
m
for
i
,
m
in
zip
(
frames
,
masks
)]
inactive
=
[
i
*
(
1
-
m
)
+
0
*
m
for
i
,
m
in
zip
(
frames
,
masks
)]
reactive
=
[
i
*
m
+
0
*
(
1
-
m
)
for
i
,
m
in
zip
(
frames
,
masks
)]
reactive
=
[
i
*
m
+
0
*
(
1
-
m
)
for
i
,
m
in
zip
(
frames
,
masks
)]
inactive
=
self
.
vae_encoder
.
encode
(
inactive
)
inactive
=
[
self
.
vae_encoder
.
encode
(
inact
.
unsqueeze
(
0
))
for
inact
in
inactive
]
reactive
=
self
.
vae_encoder
.
encode
(
reactive
)
reactive
=
[
self
.
vae_encoder
.
encode
(
react
.
unsqueeze
(
0
))
for
react
in
reactive
]
latents
=
[
torch
.
cat
((
u
,
c
),
dim
=
0
)
for
u
,
c
in
zip
(
inactive
,
reactive
)]
latents
=
[
torch
.
cat
((
u
,
c
),
dim
=
0
)
for
u
,
c
in
zip
(
inactive
,
reactive
)]
cat_latents
=
[]
cat_latents
=
[]
for
latent
,
refs
in
zip
(
latents
,
ref_images
):
for
latent
,
refs
in
zip
(
latents
,
ref_images
):
if
refs
is
not
None
:
if
refs
is
not
None
:
if
masks
is
None
:
if
masks
is
None
:
ref_latent
=
self
.
vae_encoder
.
encode
(
ref
s
)
ref_latent
=
[
self
.
vae_encoder
.
encode
(
ref
.
unsqueeze
(
0
))
for
ref
in
refs
]
else
:
else
:
ref_latent
=
self
.
vae_encoder
.
encode
(
ref
s
)
ref_latent
=
[
self
.
vae_encoder
.
encode
(
ref
.
unsqueeze
(
0
))
for
ref
in
refs
]
ref_latent
=
[
torch
.
cat
((
u
,
torch
.
zeros_like
(
u
)),
dim
=
0
)
for
u
in
ref_latent
]
ref_latent
=
[
torch
.
cat
((
u
,
torch
.
zeros_like
(
u
)),
dim
=
0
)
for
u
in
ref_latent
]
assert
all
([
x
.
shape
[
1
]
==
1
for
x
in
ref_latent
])
assert
all
([
x
.
shape
[
1
]
==
1
for
x
in
ref_latent
])
latent
=
torch
.
cat
([
*
ref_latent
,
latent
],
dim
=
1
)
latent
=
torch
.
cat
([
*
ref_latent
,
latent
],
dim
=
1
)
...
...
scripts/wan/run_wan_i2v_audio.sh
View file @
bba65ffd
#!/bin/bash
#!/bin/bash
# set path and first
# set path and first
lightx2v_path
=
/mtc/gushiqiao/llmc_workspace/LightX2V
lightx2v_path
=
model_path
=
/data/nvme0/gushiqiao/models/Lightx2v_models/Wan2.1-R2V721-Audio-14B-720P
model_path
=
export
CUDA_VISIBLE_DEVICES
=
0
export
CUDA_VISIBLE_DEVICES
=
2
# set environment variables
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
source
${
lightx2v_path
}
/scripts/base/base.sh
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment