Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xuwx1
LightX2V
Commits
e39d8438
Unverified
Commit
e39d8438
authored
Nov 07, 2025
by
gushiqiao
Committed by
GitHub
Nov 07, 2025
Browse files
update 5090 int8 config (#449)
parent
fe9aa39a
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
131 additions
and
9 deletions
+131
-9
configs/seko_talk/5090/seko_talk_5090_bf16.json
configs/seko_talk/5090/seko_talk_5090_bf16.json
+1
-1
configs/seko_talk/5090/seko_talk_5090_int8.json
configs/seko_talk/5090/seko_talk_5090_int8.json
+29
-0
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
+34
-0
lightx2v/common/ops/mm/mm_weight.py
lightx2v/common/ops/mm/mm_weight.py
+9
-4
lightx2v/models/input_encoders/hf/q_linear.py
lightx2v/models/input_encoders/hf/q_linear.py
+9
-4
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
+25
-0
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
+24
-0
No files found.
configs/seko_talk/5090/seko_talk_5090_bf16.json
View file @
e39d8438
{
"infer_steps"
:
4
,
"target_fps"
:
16
,
"video_duration"
:
5
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
...
...
configs/seko_talk/5090/seko_talk_5090_int8.json
0 → 100755
View file @
e39d8438
{
"infer_steps"
:
4
,
"target_fps"
:
16
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
"self_attn_1_type"
:
"sage_attn3"
,
"cross_attn_1_type"
:
"sage_attn3"
,
"cross_attn_2_type"
:
"sage_attn3"
,
"sample_guide_scale"
:
1
,
"sample_shift"
:
5
,
"enable_cfg"
:
false
,
"use_31_block"
:
false
,
"cpu_offload"
:
true
,
"offload_granularity"
:
"block"
,
"offload_ratio"
:
1
,
"t5_cpu_offload"
:
false
,
"clip_cpu_offload"
:
false
,
"audio_encoder_cpu_offload"
:
false
,
"audio_adapter_cpu_offload"
:
false
,
"vae_cpu_offload"
:
false
,
"dit_quantized"
:
true
,
"dit_quant_scheme"
:
"int8-q8f"
,
"adapter_quantized"
:
true
,
"adapter_quant_scheme"
:
"int8-q8f"
,
"t5_quantized"
:
true
,
"t5_quant_scheme"
:
"int8-q8f"
}
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
0 → 100755
View file @
e39d8438
{
"infer_steps"
:
4
,
"target_fps"
:
16
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
"self_attn_1_type"
:
"sage_attn3"
,
"cross_attn_1_type"
:
"sage_attn3"
,
"cross_attn_2_type"
:
"sage_attn3"
,
"sample_guide_scale"
:
1
,
"sample_shift"
:
5
,
"enable_cfg"
:
false
,
"use_31_block"
:
false
,
"cpu_offload"
:
true
,
"offload_granularity"
:
"block"
,
"offload_ratio"
:
1
,
"t5_cpu_offload"
:
false
,
"clip_cpu_offload"
:
false
,
"audio_encoder_cpu_offload"
:
false
,
"audio_adapter_cpu_offload"
:
false
,
"vae_cpu_offload"
:
false
,
"dit_quantized"
:
true
,
"dit_quant_scheme"
:
"int8-q8f"
,
"adapter_quantized"
:
true
,
"adapter_quant_scheme"
:
"int8-q8f"
,
"t5_quantized"
:
true
,
"t5_quant_scheme"
:
"int8-q8f"
,
"parallel"
:
{
"seq_p_size"
:
8
,
"seq_p_attn_type"
:
"ulysses"
}
}
lightx2v/common/ops/mm/mm_weight.py
View file @
e39d8438
...
...
@@ -35,9 +35,14 @@ except ImportError:
sgl_kernel
=
None
try
:
import
q8_kernels.functional
as
Q8F
from
q8_kernels.functional
.linear
import
q8_linear
except
ImportError
:
Q8F
=
None
q8_linear
=
None
try
:
from
q8_kernels.functional.linear
import
fp8_linear
except
ImportError
:
fp8_linear
=
None
try
:
import
deep_gemm
...
...
@@ -820,7 +825,7 @@ class MMWeightWfp8channelAfp8channeldynamicQ8F(MMWeightQuantTemplate):
def
apply
(
self
,
input_tensor
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
output_tensor
=
Q8F
.
linear
.
fp8_linear
(
output_tensor
=
fp8_linear
(
input_tensor_quant
,
self
.
weight
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
...
...
@@ -850,7 +855,7 @@ class MMWeightWint8channelAint8channeldynamicQ8F(MMWeightQuantTemplate):
def
apply
(
self
,
input_tensor
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
output_tensor
=
Q8F
.
linear
.
q8_linear
(
output_tensor
=
q8_linear
(
input_tensor_quant
,
self
.
weight
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
...
...
lightx2v/models/input_encoders/hf/q_linear.py
View file @
e39d8438
...
...
@@ -17,9 +17,14 @@ except ModuleNotFoundError:
quant_int8_per_token_matmul
,
quantize_activation_per_token_absmax
=
None
,
None
try
:
import
q8_kernels.functional
as
Q8F
from
q8_kernels.functional
.linear
import
q8_linear
except
ImportError
:
Q8F
=
None
q8_linear
=
None
try
:
from
q8_kernels.functional.linear
import
fp8_linear
except
ImportError
:
fp8_linear
=
None
class
VllmQuantLinearInt8
(
nn
.
Module
):
...
...
@@ -236,7 +241,7 @@ class Q8FQuantLinearInt8(nn.Module):
def
forward
(
self
,
x
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
output_tensor
=
Q8F
.
linear
.
q8_linear
(
output_tensor
=
q8_linear
(
input_tensor_quant
,
self
.
weight
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
...
...
@@ -282,7 +287,7 @@ class Q8FQuantLinearFp8(nn.Module):
def
forward
(
self
,
x
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
output_tensor
=
Q8F
.
linear
.
fp8_linear
(
output_tensor
=
fp8_linear
(
input_tensor_quant
,
self
.
weight
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
...
...
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
0 → 100755
View file @
e39d8438
#!/bin/bash
lightx2v_path
=
/path/to/Lightx2v
model_path
=
/path/to/SekoTalk-Distill-int8
export
CUDA_VISIBLE_DEVICES
=
0
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
export
PYTORCH_CUDA_ALLOC_CONF
=
expandable_segments:True
export
SENSITIVE_LAYER_DTYPE
=
None
python
-m
lightx2v.infer
\
--model_cls
seko_talk
\
--task
s2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/seko_talk/5090/seko_talk_5090_int8.json
\
--prompt
"The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze."
\
--negative_prompt
色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走
\
--image_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.png
\
--audio_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.mp3
\
--save_result_path
${
lightx2v_path
}
/save_results/output_lightx2v_seko_talk.mp4
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
0 → 100755
View file @
e39d8438
#!/bin/bash
lightx2v_path
=
/path/to/Lightx2v
model_path
=
/path/to/SekoTalk-Distill-int8
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
export
PYTORCH_CUDA_ALLOC_CONF
=
expandable_segments:True
export
SENSITIVE_LAYER_DTYPE
=
None
torchrun
--nproc-per-node
8
-m
lightx2v.infer
\
--model_cls
seko_talk
\
--task
s2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
\
--prompt
"The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze."
\
--negative_prompt
色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走
\
--image_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.png
\
--audio_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.mp3
\
--save_result_path
${
lightx2v_path
}
/save_results/output_lightx2v_seko_talk.mp4
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment