Unverified Commit e39d8438 authored by gushiqiao's avatar gushiqiao Committed by GitHub
Browse files

update 5090 int8 config (#449)

parent fe9aa39a
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 5, "video_duration": 360,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "adaptive", "resize_mode": "adaptive",
......
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn3",
"cross_attn_1_type": "sage_attn3",
"cross_attn_2_type": "sage_attn3",
"sample_guide_scale": 1,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 1,
"t5_cpu_offload": false,
"clip_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false,
"vae_cpu_offload": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-q8f",
"t5_quantized": true,
"t5_quant_scheme": "int8-q8f"
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn3",
"cross_attn_1_type": "sage_attn3",
"cross_attn_2_type": "sage_attn3",
"sample_guide_scale": 1,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 1,
"t5_cpu_offload": false,
"clip_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false,
"vae_cpu_offload": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-q8f",
"t5_quantized": true,
"t5_quant_scheme": "int8-q8f",
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses"
}
}
...@@ -35,9 +35,14 @@ except ImportError: ...@@ -35,9 +35,14 @@ except ImportError:
sgl_kernel = None sgl_kernel = None
try: try:
import q8_kernels.functional as Q8F from q8_kernels.functional.linear import q8_linear
except ImportError: except ImportError:
Q8F = None q8_linear = None
try:
from q8_kernels.functional.linear import fp8_linear
except ImportError:
fp8_linear = None
try: try:
import deep_gemm import deep_gemm
...@@ -820,7 +825,7 @@ class MMWeightWfp8channelAfp8channeldynamicQ8F(MMWeightQuantTemplate): ...@@ -820,7 +825,7 @@ class MMWeightWfp8channelAfp8channeldynamicQ8F(MMWeightQuantTemplate):
def apply(self, input_tensor): def apply(self, input_tensor):
input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor) input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor)
output_tensor = Q8F.linear.fp8_linear( output_tensor = fp8_linear(
input_tensor_quant, input_tensor_quant,
self.weight, self.weight,
self.bias.float() if self.bias is not None else None, self.bias.float() if self.bias is not None else None,
...@@ -850,7 +855,7 @@ class MMWeightWint8channelAint8channeldynamicQ8F(MMWeightQuantTemplate): ...@@ -850,7 +855,7 @@ class MMWeightWint8channelAint8channeldynamicQ8F(MMWeightQuantTemplate):
def apply(self, input_tensor): def apply(self, input_tensor):
input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor) input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor)
output_tensor = Q8F.linear.q8_linear( output_tensor = q8_linear(
input_tensor_quant, input_tensor_quant,
self.weight, self.weight,
self.bias.float() if self.bias is not None else None, self.bias.float() if self.bias is not None else None,
......
...@@ -17,9 +17,14 @@ except ModuleNotFoundError: ...@@ -17,9 +17,14 @@ except ModuleNotFoundError:
quant_int8_per_token_matmul, quantize_activation_per_token_absmax = None, None quant_int8_per_token_matmul, quantize_activation_per_token_absmax = None, None
try: try:
import q8_kernels.functional as Q8F from q8_kernels.functional.linear import q8_linear
except ImportError: except ImportError:
Q8F = None q8_linear = None
try:
from q8_kernels.functional.linear import fp8_linear
except ImportError:
fp8_linear = None
class VllmQuantLinearInt8(nn.Module): class VllmQuantLinearInt8(nn.Module):
...@@ -236,7 +241,7 @@ class Q8FQuantLinearInt8(nn.Module): ...@@ -236,7 +241,7 @@ class Q8FQuantLinearInt8(nn.Module):
def forward(self, x): def forward(self, x):
input_tensor_quant, input_tensor_scale = self.act_quant_func(x) input_tensor_quant, input_tensor_scale = self.act_quant_func(x)
output_tensor = Q8F.linear.q8_linear( output_tensor = q8_linear(
input_tensor_quant, input_tensor_quant,
self.weight, self.weight,
self.bias if self.bias is not None else None, self.bias if self.bias is not None else None,
...@@ -282,7 +287,7 @@ class Q8FQuantLinearFp8(nn.Module): ...@@ -282,7 +287,7 @@ class Q8FQuantLinearFp8(nn.Module):
def forward(self, x): def forward(self, x):
input_tensor_quant, input_tensor_scale = self.act_quant_func(x) input_tensor_quant, input_tensor_scale = self.act_quant_func(x)
output_tensor = Q8F.linear.fp8_linear( output_tensor = fp8_linear(
input_tensor_quant, input_tensor_quant,
self.weight, self.weight,
self.bias if self.bias is not None else None, self.bias if self.bias is not None else None,
......
#!/bin/bash
lightx2v_path=/path/to/Lightx2v
model_path=/path/to/SekoTalk-Distill-int8
export CUDA_VISIBLE_DEVICES=0
# set environment variables
source ${lightx2v_path}/scripts/base/base.sh
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export SENSITIVE_LAYER_DTYPE=None
python -m lightx2v.infer \
--model_cls seko_talk \
--task s2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/5090/seko_talk_5090_int8.json \
--prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.mp3 \
--save_result_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
#!/bin/bash
lightx2v_path=/path/to/Lightx2v
model_path=/path/to/SekoTalk-Distill-int8
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
# set environment variables
source ${lightx2v_path}/scripts/base/base.sh
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export SENSITIVE_LAYER_DTYPE=None
torchrun --nproc-per-node 8 -m lightx2v.infer \
--model_cls seko_talk \
--task s2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/5090/seko_talk_5090_int8_8gpu.json \
--prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.mp3 \
--save_result_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment