Commit 0f12f80a authored by zhuwenwen's avatar zhuwenwen
Browse files

修改增加SlimQuantW4A8Int8MoEMethod 获取intermediate_size_per_partition 支持

parent 54139f16
...@@ -825,7 +825,7 @@ class FusedMoE(torch.nn.Module): ...@@ -825,7 +825,7 @@ class FusedMoE(torch.nn.Module):
if (self.quant_method.__class__.__name__ in ("BlockInt8MoEMethod")): if (self.quant_method.__class__.__name__ in ("BlockInt8MoEMethod")):
moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition
if (self.quant_method.__class__.__name__ in ("W8A8Int8MoEMethod")): if (self.quant_method.__class__.__name__ in ("SlimQuantW4A8Int8MoEMethod")):
moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment