"vscode:/vscode.git/clone" did not exist on "f0bbfaf917edef99fe7817d607d56803695610ca"
Commit c03a553b authored by 王敏's avatar 王敏
Browse files

[feat]w8a8 高吞吐模式先量化再dispatch

parent 4fadef92
......@@ -520,6 +520,8 @@ class CompressedTensorsW8A8Int8MarlinMoEMethod(CompressedTensorsMarlinMoEMethod)
False)
return TritonOrGroupGemmExperts(
use_int8_w8a8=envs.VLLM_ENABLE_DEEPEP_HT_DEEPGEMM,
#use_int8_w8a8=envs.VLLM_ENABLE_DEEPEP_HT_DEEPGEMM,
use_int8_w8a8=True,
per_act_token_quant=True,
fused_experts=self.w8a8_groupgemm_contiguous_forward if envs.VLLM_ENABLE_DEEPEP_HT_DEEPGEMM else self.fused_moe_forward
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment