修改增加SlimQuantW4A8Int8MoEMethod 获取intermediate_size_per_partition 支持

0f12f80a · zhuwenwen · 54139f16 · 0f12f80a
Commit 0f12f80a authored Aug 09, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/fused_moe/layer.py vllm/model_executor/layers/fused_moe/layer.py +1 -1

No files found.
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -825,7 +825,7 @@ class FusedMoE(torch.nn.Module):
        if (self.quant_method.__class__.__name__ in ("BlockInt8MoEMethod")):
            moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition
-        if (self.quant_method.__class__.__name__ in ("W8A8Int8MoEMethod")):
+        if (self.quant_method.__class__.__name__ in ("SlimQuantW4A8Int8MoEMethod")):
            moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition