Commit 8a74165f authored by 王敏's avatar 王敏
Browse files

w4a8 默认使用deepgemm的masked接口

parent d04137d6
...@@ -41,9 +41,9 @@ from vllm.model_executor.layers.activation import SiluAndMul ...@@ -41,9 +41,9 @@ from vllm.model_executor.layers.activation import SiluAndMul
from lightop import fuse_silu_mul_quant_ep, m_grouped_w4a8_gemm_nt_masked from lightop import fuse_silu_mul_quant_ep, m_grouped_w4a8_gemm_nt_masked
from lmslim.layers.gemm.int8_utils import per_token_quant_int8 from lmslim.layers.gemm.int8_utils import per_token_quant_int8
if has_deep_gemm(): if has_deep_gemm():
from deepgemm import m_grouped_w8a8_gemm_nt_masked from deepgemm import m_grouped_w8a8_gemm_nt_masked, m_grouped_w4a8_gemm_nt_masked
else: else:
from lightop import m_grouped_w8a8_gemm_nt_masked from lightop import m_grouped_w8a8_gemm_nt_masked, m_grouped_w4a8_gemm_nt_masked
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment