Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xuwx1
LightX2V
Commits
a40ffb3f
Commit
a40ffb3f
authored
Sep 15, 2025
by
Watebear
Committed by
GitHub
Sep 15, 2025
Browse files
refactor qwen-image (#297)
parent
701075f4
Changes
26
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
421 additions
and
15 deletions
+421
-15
lightx2v/models/networks/qwen_image/weights/pre_weights.py
lightx2v/models/networks/qwen_image/weights/pre_weights.py
+30
-0
lightx2v/models/networks/qwen_image/weights/transformer_weights.py
...models/networks/qwen_image/weights/transformer_weights.py
+318
-0
lightx2v/models/runners/qwen_image/qwen_image_runner.py
lightx2v/models/runners/qwen_image/qwen_image_runner.py
+3
-7
lightx2v/models/video_encoders/hf/qwen_image/vae.py
lightx2v/models/video_encoders/hf/qwen_image/vae.py
+30
-8
scripts/qwen_image/qwen_image_i2i_block.sh
scripts/qwen_image/qwen_image_i2i_block.sh
+40
-0
scripts/qwen_image/qwen_image_t2i_block.sh
scripts/qwen_image/qwen_image_t2i_block.sh
+0
-0
No files found.
lightx2v/models/networks/qwen_image/weights/pre_weights.py
0 → 100644
View file @
a40ffb3f
from
lightx2v.common.modules.weight_module
import
WeightModule
from
lightx2v.utils.registry_factory
import
(
MM_WEIGHT_REGISTER
,
RMS_WEIGHT_REGISTER
,
)
class
QwenImagePreWeights
(
WeightModule
):
def
__init__
(
self
,
config
):
super
().
__init__
()
self
.
config
=
config
# img_in
self
.
add_module
(
"img_in"
,
MM_WEIGHT_REGISTER
[
"Default"
](
"img_in.weight"
,
"img_in.bias"
),
)
# txt_in
self
.
add_module
(
"txt_in"
,
MM_WEIGHT_REGISTER
[
"Default"
](
"txt_in.weight"
,
"txt_in.bias"
),
)
# txt_norm
self
.
add_module
(
"txt_norm"
,
RMS_WEIGHT_REGISTER
[
"fp32_variance"
](
"txt_norm.weight"
))
# time_text_embed
self
.
add_module
(
"time_text_embed_timestep_embedder_linear_1"
,
MM_WEIGHT_REGISTER
[
"Default"
](
"time_text_embed.timestep_embedder.linear_1.weight"
,
"time_text_embed.timestep_embedder.linear_1.bias"
)
)
self
.
add_module
(
"time_text_embed_timestep_embedder_linear_2"
,
MM_WEIGHT_REGISTER
[
"Default"
](
"time_text_embed.timestep_embedder.linear_2.weight"
,
"time_text_embed.timestep_embedder.linear_2.bias"
)
)
lightx2v/models/networks/qwen_image/weights/transformer_weights.py
0 → 100644
View file @
a40ffb3f
This diff is collapsed.
Click to expand it.
lightx2v/models/runners/qwen_image/qwen_image_runner.py
View file @
a40ffb3f
...
@@ -69,14 +69,14 @@ class QwenImageRunner(DefaultRunner):
...
@@ -69,14 +69,14 @@ class QwenImageRunner(DefaultRunner):
else
:
else
:
assert
NotImplementedError
assert
NotImplementedError
self
.
model
.
set_scheduler
(
self
.
scheduler
)
@
ProfilingContext4DebugL2
(
"Run DiT"
)
@
ProfilingContext4DebugL2
(
"Run DiT"
)
def
_run_dit_local
(
self
,
total_steps
=
None
):
def
_run_dit_local
(
self
,
total_steps
=
None
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
if
self
.
config
.
get
(
"lazy_load"
,
False
)
or
self
.
config
.
get
(
"unload_modules"
,
False
):
self
.
model
=
self
.
load_transformer
()
self
.
model
=
self
.
load_transformer
()
self
.
init_scheduler
()
self
.
init_scheduler
()
self
.
model
.
scheduler
.
prepare
(
self
.
inputs
[
"image_encoder_output"
])
self
.
model
.
scheduler
.
prepare
(
self
.
inputs
[
"image_encoder_output"
])
if
self
.
config
.
get
(
"model_cls"
)
==
"wan2.2"
and
self
.
config
[
"task"
]
==
"i2v"
:
self
.
inputs
[
"image_encoder_output"
][
"vae_encoder_out"
]
=
None
latents
,
generator
=
self
.
run
(
total_steps
)
latents
,
generator
=
self
.
run
(
total_steps
)
self
.
end_run
()
self
.
end_run
()
return
latents
,
generator
return
latents
,
generator
...
@@ -167,11 +167,7 @@ class QwenImageRunner(DefaultRunner):
...
@@ -167,11 +167,7 @@ class QwenImageRunner(DefaultRunner):
self
.
config
.
target_shape
=
(
self
.
config
.
batchsize
,
1
,
num_channels_latents
,
height
,
width
)
self
.
config
.
target_shape
=
(
self
.
config
.
batchsize
,
1
,
num_channels_latents
,
height
,
width
)
def
init_scheduler
(
self
):
def
init_scheduler
(
self
):
scheduler
=
QwenImageScheduler
(
self
.
config
)
self
.
scheduler
=
QwenImageScheduler
(
self
.
config
)
self
.
model
.
set_scheduler
(
scheduler
)
self
.
model
.
pre_infer
.
set_scheduler
(
scheduler
)
self
.
model
.
transformer_infer
.
set_scheduler
(
scheduler
)
self
.
model
.
post_infer
.
set_scheduler
(
scheduler
)
def
get_encoder_output_i2v
(
self
):
def
get_encoder_output_i2v
(
self
):
pass
pass
...
...
lightx2v/models/video_encoders/hf/qwen_image/vae.py
View file @
a40ffb3f
import
gc
import
json
import
json
import
os
import
os
from
typing
import
Optional
from
typing
import
Optional
...
@@ -27,15 +28,23 @@ def retrieve_latents(encoder_output: torch.Tensor, generator: Optional[torch.Gen
...
@@ -27,15 +28,23 @@ def retrieve_latents(encoder_output: torch.Tensor, generator: Optional[torch.Gen
class
AutoencoderKLQwenImageVAE
:
class
AutoencoderKLQwenImageVAE
:
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
self
.
config
=
config
self
.
config
=
config
self
.
model
=
AutoencoderKLQwenImage
.
from_pretrained
(
os
.
path
.
join
(
config
.
model_path
,
"vae"
)).
to
(
torch
.
device
(
"cuda"
)).
to
(
torch
.
bfloat16
)
self
.
image_processor
=
VaeImageProcessor
(
vae_scale_factor
=
config
.
vae_scale_factor
*
2
)
self
.
cpu_offload
=
config
.
get
(
"cpu_offload"
,
False
)
with
open
(
os
.
path
.
join
(
config
.
model_path
,
"vae"
,
"config.json"
),
"r"
)
as
f
:
if
self
.
cpu_offload
:
vae_config
=
json
.
load
(
f
)
self
.
device
=
torch
.
device
(
"cpu"
)
self
.
vae_scale_factor
=
2
**
len
(
vae_config
[
"temperal_downsample"
])
if
"temperal_downsample"
in
vae_config
else
8
else
:
self
.
generator
=
torch
.
Generator
(
device
=
"cuda"
)
.
manual_seed
(
config
.
seed
)
self
.
device
=
torch
.
device
(
"cuda"
)
self
.
dtype
=
torch
.
bfloat16
self
.
dtype
=
torch
.
bfloat16
self
.
device
=
torch
.
device
(
"cuda"
)
self
.
latent_channels
=
config
.
vae_z_dim
self
.
latent_channels
=
config
.
vae_z_dim
self
.
load
()
def
load
(
self
):
self
.
model
=
AutoencoderKLQwenImage
.
from_pretrained
(
os
.
path
.
join
(
self
.
config
.
model_path
,
"vae"
)).
to
(
self
.
device
).
to
(
self
.
dtype
)
self
.
image_processor
=
VaeImageProcessor
(
vae_scale_factor
=
self
.
config
.
vae_scale_factor
*
2
)
with
open
(
os
.
path
.
join
(
self
.
config
.
model_path
,
"vae"
,
"config.json"
),
"r"
)
as
f
:
vae_config
=
json
.
load
(
f
)
self
.
vae_scale_factor
=
2
**
len
(
vae_config
[
"temperal_downsample"
])
if
"temperal_downsample"
in
vae_config
else
8
self
.
generator
=
torch
.
Generator
(
device
=
"cuda"
).
manual_seed
(
self
.
config
.
seed
)
@
staticmethod
@
staticmethod
def
_unpack_latents
(
latents
,
height
,
width
,
vae_scale_factor
):
def
_unpack_latents
(
latents
,
height
,
width
,
vae_scale_factor
):
...
@@ -55,6 +64,8 @@ class AutoencoderKLQwenImageVAE:
...
@@ -55,6 +64,8 @@ class AutoencoderKLQwenImageVAE:
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
decode
(
self
,
latents
):
def
decode
(
self
,
latents
):
if
self
.
cpu_offload
:
self
.
model
.
to
(
torch
.
device
(
"cuda"
))
if
self
.
config
.
task
==
"t2i"
:
if
self
.
config
.
task
==
"t2i"
:
width
,
height
=
self
.
config
.
aspect_ratios
[
self
.
config
.
aspect_ratio
]
width
,
height
=
self
.
config
.
aspect_ratios
[
self
.
config
.
aspect_ratio
]
elif
self
.
config
.
task
==
"i2i"
:
elif
self
.
config
.
task
==
"i2i"
:
...
@@ -66,6 +77,10 @@ class AutoencoderKLQwenImageVAE:
...
@@ -66,6 +77,10 @@ class AutoencoderKLQwenImageVAE:
latents
=
latents
/
latents_std
+
latents_mean
latents
=
latents
/
latents_std
+
latents_mean
images
=
self
.
model
.
decode
(
latents
,
return_dict
=
False
)[
0
][:,
:,
0
]
images
=
self
.
model
.
decode
(
latents
,
return_dict
=
False
)[
0
][:,
:,
0
]
images
=
self
.
image_processor
.
postprocess
(
images
,
output_type
=
"pil"
)
images
=
self
.
image_processor
.
postprocess
(
images
,
output_type
=
"pil"
)
if
self
.
cpu_offload
:
self
.
model
.
to
(
torch
.
device
(
"cpu"
))
torch
.
cuda
.
empty_cache
()
gc
.
collect
()
return
images
return
images
@
staticmethod
@
staticmethod
...
@@ -88,9 +103,12 @@ class AutoencoderKLQwenImageVAE:
...
@@ -88,9 +103,12 @@ class AutoencoderKLQwenImageVAE:
return
image_latents
return
image_latents
@
torch
.
no_grad
()
def
encode_vae_image
(
self
,
image
):
def
encode_vae_image
(
self
,
image
):
if
self
.
cpu_offload
:
self
.
model
.
to
(
torch
.
device
(
"cuda"
))
num_channels_latents
=
self
.
config
.
transformer_in_channels
//
4
num_channels_latents
=
self
.
config
.
transformer_in_channels
//
4
image
=
image
.
to
(
self
.
device
).
to
(
self
.
dtype
)
image
=
image
.
to
(
self
.
model
.
device
).
to
(
self
.
dtype
)
if
image
.
shape
[
1
]
!=
self
.
latent_channels
:
if
image
.
shape
[
1
]
!=
self
.
latent_channels
:
image_latents
=
self
.
_encode_vae_image
(
image
=
image
,
generator
=
self
.
generator
)
image_latents
=
self
.
_encode_vae_image
(
image
=
image
,
generator
=
self
.
generator
)
else
:
else
:
...
@@ -106,4 +124,8 @@ class AutoencoderKLQwenImageVAE:
...
@@ -106,4 +124,8 @@ class AutoencoderKLQwenImageVAE:
image_latent_height
,
image_latent_width
=
image_latents
.
shape
[
3
:]
image_latent_height
,
image_latent_width
=
image_latents
.
shape
[
3
:]
image_latents
=
self
.
_pack_latents
(
image_latents
,
self
.
config
.
batchsize
,
num_channels_latents
,
image_latent_height
,
image_latent_width
)
image_latents
=
self
.
_pack_latents
(
image_latents
,
self
.
config
.
batchsize
,
num_channels_latents
,
image_latent_height
,
image_latent_width
)
if
self
.
cpu_offload
:
self
.
model
.
to
(
torch
.
device
(
"cpu"
))
torch
.
cuda
.
empty_cache
()
gc
.
collect
()
return
image_latents
return
image_latents
scripts/qwen_image/qwen_image_i2i_block.sh
0 → 100644
View file @
a40ffb3f
#!/bin/bash
export
CUDA_VISIBLE_DEVICES
=
# set path and first
export
lightx2v_path
=
export
model_path
=
# check section
if
[
-z
"
${
CUDA_VISIBLE_DEVICES
}
"
]
;
then
cuda_devices
=
0
echo
"Warn: CUDA_VISIBLE_DEVICES is not set, using default value:
${
cuda_devices
}
, change at shell script or set env variable."
export
CUDA_VISIBLE_DEVICES
=
${
cuda_devices
}
fi
if
[
-z
"
${
lightx2v_path
}
"
]
;
then
echo
"Error: lightx2v_path is not set. Please set this variable first."
exit
1
fi
if
[
-z
"
${
model_path
}
"
]
;
then
echo
"Error: model_path is not set. Please set this variable first."
exit
1
fi
export
TOKENIZERS_PARALLELISM
=
false
export
PYTHONPATH
=
${
lightx2v_path
}
:
$PYTHONPATH
export
DTYPE
=
BF16
export
PROFILING_DEBUG_LEVEL
=
2
export
ENABLE_GRAPH_MODE
=
false
python
-m
lightx2v.infer
\
--model_cls
qwen_image
\
--task
i2i
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/offload/block/qwen_image_i2i_block.json
\
--prompt
"Change the rabbit's color to purple, with a flash light background."
\
--image_path
input.jpg
\
--save_video_path
${
lightx2v_path
}
/save_results/qwen_image_i2i.png
scripts/qwen_image/qwen_image_t2i_
offload
.sh
→
scripts/qwen_image/qwen_image_t2i_
block
.sh
View file @
a40ffb3f
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment