Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xuwx1
LightX2V
Commits
e39d8438
"vscode:/vscode.git/clone" did not exist on "6db8aad4ad78abab5d83099b5aefcd1f56631f77"
Unverified
Commit
e39d8438
authored
Nov 07, 2025
by
gushiqiao
Committed by
GitHub
Nov 07, 2025
Browse files
update 5090 int8 config (#449)
parent
fe9aa39a
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
131 additions
and
9 deletions
+131
-9
configs/seko_talk/5090/seko_talk_5090_bf16.json
configs/seko_talk/5090/seko_talk_5090_bf16.json
+1
-1
configs/seko_talk/5090/seko_talk_5090_int8.json
configs/seko_talk/5090/seko_talk_5090_int8.json
+29
-0
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
+34
-0
lightx2v/common/ops/mm/mm_weight.py
lightx2v/common/ops/mm/mm_weight.py
+9
-4
lightx2v/models/input_encoders/hf/q_linear.py
lightx2v/models/input_encoders/hf/q_linear.py
+9
-4
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
+25
-0
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
+24
-0
No files found.
configs/seko_talk/5090/seko_talk_5090_bf16.json
View file @
e39d8438
{
{
"infer_steps"
:
4
,
"infer_steps"
:
4
,
"target_fps"
:
16
,
"target_fps"
:
16
,
"video_duration"
:
5
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
"resize_mode"
:
"adaptive"
,
...
...
configs/seko_talk/5090/seko_talk_5090_int8.json
0 → 100755
View file @
e39d8438
{
"infer_steps"
:
4
,
"target_fps"
:
16
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
"self_attn_1_type"
:
"sage_attn3"
,
"cross_attn_1_type"
:
"sage_attn3"
,
"cross_attn_2_type"
:
"sage_attn3"
,
"sample_guide_scale"
:
1
,
"sample_shift"
:
5
,
"enable_cfg"
:
false
,
"use_31_block"
:
false
,
"cpu_offload"
:
true
,
"offload_granularity"
:
"block"
,
"offload_ratio"
:
1
,
"t5_cpu_offload"
:
false
,
"clip_cpu_offload"
:
false
,
"audio_encoder_cpu_offload"
:
false
,
"audio_adapter_cpu_offload"
:
false
,
"vae_cpu_offload"
:
false
,
"dit_quantized"
:
true
,
"dit_quant_scheme"
:
"int8-q8f"
,
"adapter_quantized"
:
true
,
"adapter_quant_scheme"
:
"int8-q8f"
,
"t5_quantized"
:
true
,
"t5_quant_scheme"
:
"int8-q8f"
}
configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
0 → 100755
View file @
e39d8438
{
"infer_steps"
:
4
,
"target_fps"
:
16
,
"video_duration"
:
360
,
"audio_sr"
:
16000
,
"target_video_length"
:
81
,
"resize_mode"
:
"adaptive"
,
"self_attn_1_type"
:
"sage_attn3"
,
"cross_attn_1_type"
:
"sage_attn3"
,
"cross_attn_2_type"
:
"sage_attn3"
,
"sample_guide_scale"
:
1
,
"sample_shift"
:
5
,
"enable_cfg"
:
false
,
"use_31_block"
:
false
,
"cpu_offload"
:
true
,
"offload_granularity"
:
"block"
,
"offload_ratio"
:
1
,
"t5_cpu_offload"
:
false
,
"clip_cpu_offload"
:
false
,
"audio_encoder_cpu_offload"
:
false
,
"audio_adapter_cpu_offload"
:
false
,
"vae_cpu_offload"
:
false
,
"dit_quantized"
:
true
,
"dit_quant_scheme"
:
"int8-q8f"
,
"adapter_quantized"
:
true
,
"adapter_quant_scheme"
:
"int8-q8f"
,
"t5_quantized"
:
true
,
"t5_quant_scheme"
:
"int8-q8f"
,
"parallel"
:
{
"seq_p_size"
:
8
,
"seq_p_attn_type"
:
"ulysses"
}
}
lightx2v/common/ops/mm/mm_weight.py
View file @
e39d8438
...
@@ -35,9 +35,14 @@ except ImportError:
...
@@ -35,9 +35,14 @@ except ImportError:
sgl_kernel
=
None
sgl_kernel
=
None
try
:
try
:
import
q8_kernels.functional
as
Q8F
from
q8_kernels.functional
.linear
import
q8_linear
except
ImportError
:
except
ImportError
:
Q8F
=
None
q8_linear
=
None
try
:
from
q8_kernels.functional.linear
import
fp8_linear
except
ImportError
:
fp8_linear
=
None
try
:
try
:
import
deep_gemm
import
deep_gemm
...
@@ -820,7 +825,7 @@ class MMWeightWfp8channelAfp8channeldynamicQ8F(MMWeightQuantTemplate):
...
@@ -820,7 +825,7 @@ class MMWeightWfp8channelAfp8channeldynamicQ8F(MMWeightQuantTemplate):
def
apply
(
self
,
input_tensor
):
def
apply
(
self
,
input_tensor
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
output_tensor
=
Q8F
.
linear
.
fp8_linear
(
output_tensor
=
fp8_linear
(
input_tensor_quant
,
input_tensor_quant
,
self
.
weight
,
self
.
weight
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
...
@@ -850,7 +855,7 @@ class MMWeightWint8channelAint8channeldynamicQ8F(MMWeightQuantTemplate):
...
@@ -850,7 +855,7 @@ class MMWeightWint8channelAint8channeldynamicQ8F(MMWeightQuantTemplate):
def
apply
(
self
,
input_tensor
):
def
apply
(
self
,
input_tensor
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
input_tensor
)
output_tensor
=
Q8F
.
linear
.
q8_linear
(
output_tensor
=
q8_linear
(
input_tensor_quant
,
input_tensor_quant
,
self
.
weight
,
self
.
weight
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
self
.
bias
.
float
()
if
self
.
bias
is
not
None
else
None
,
...
...
lightx2v/models/input_encoders/hf/q_linear.py
View file @
e39d8438
...
@@ -17,9 +17,14 @@ except ModuleNotFoundError:
...
@@ -17,9 +17,14 @@ except ModuleNotFoundError:
quant_int8_per_token_matmul
,
quantize_activation_per_token_absmax
=
None
,
None
quant_int8_per_token_matmul
,
quantize_activation_per_token_absmax
=
None
,
None
try
:
try
:
import
q8_kernels.functional
as
Q8F
from
q8_kernels.functional
.linear
import
q8_linear
except
ImportError
:
except
ImportError
:
Q8F
=
None
q8_linear
=
None
try
:
from
q8_kernels.functional.linear
import
fp8_linear
except
ImportError
:
fp8_linear
=
None
class
VllmQuantLinearInt8
(
nn
.
Module
):
class
VllmQuantLinearInt8
(
nn
.
Module
):
...
@@ -236,7 +241,7 @@ class Q8FQuantLinearInt8(nn.Module):
...
@@ -236,7 +241,7 @@ class Q8FQuantLinearInt8(nn.Module):
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
output_tensor
=
Q8F
.
linear
.
q8_linear
(
output_tensor
=
q8_linear
(
input_tensor_quant
,
input_tensor_quant
,
self
.
weight
,
self
.
weight
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
...
@@ -282,7 +287,7 @@ class Q8FQuantLinearFp8(nn.Module):
...
@@ -282,7 +287,7 @@ class Q8FQuantLinearFp8(nn.Module):
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
input_tensor_quant
,
input_tensor_scale
=
self
.
act_quant_func
(
x
)
output_tensor
=
Q8F
.
linear
.
fp8_linear
(
output_tensor
=
fp8_linear
(
input_tensor_quant
,
input_tensor_quant
,
self
.
weight
,
self
.
weight
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
self
.
bias
if
self
.
bias
is
not
None
else
None
,
...
...
scripts/seko_talk/run_seko_talk_21_5090_int8.sh
0 → 100755
View file @
e39d8438
#!/bin/bash
lightx2v_path
=
/path/to/Lightx2v
model_path
=
/path/to/SekoTalk-Distill-int8
export
CUDA_VISIBLE_DEVICES
=
0
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
export
PYTORCH_CUDA_ALLOC_CONF
=
expandable_segments:True
export
SENSITIVE_LAYER_DTYPE
=
None
python
-m
lightx2v.infer
\
--model_cls
seko_talk
\
--task
s2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/seko_talk/5090/seko_talk_5090_int8.json
\
--prompt
"The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze."
\
--negative_prompt
色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走
\
--image_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.png
\
--audio_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.mp3
\
--save_result_path
${
lightx2v_path
}
/save_results/output_lightx2v_seko_talk.mp4
scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
0 → 100755
View file @
e39d8438
#!/bin/bash
lightx2v_path
=
/path/to/Lightx2v
model_path
=
/path/to/SekoTalk-Distill-int8
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3,4,5,6,7
# set environment variables
source
${
lightx2v_path
}
/scripts/base/base.sh
export
PYTORCH_CUDA_ALLOC_CONF
=
expandable_segments:True
export
SENSITIVE_LAYER_DTYPE
=
None
torchrun
--nproc-per-node
8
-m
lightx2v.infer
\
--model_cls
seko_talk
\
--task
s2v
\
--model_path
$model_path
\
--config_json
${
lightx2v_path
}
/configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json
\
--prompt
"The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze."
\
--negative_prompt
色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走
\
--image_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.png
\
--audio_path
${
lightx2v_path
}
/assets/inputs/audio/seko_input.mp3
\
--save_result_path
${
lightx2v_path
}
/save_results/output_lightx2v_seko_talk.mp4
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment