[Bugfix] fix _get_quant_method of FusedMoE for deepseekV3.2 on non-NV… (#30057)

Signed-off-by: tjp_zju <tanjianpingzju1990@gmail.com>

[Bugfix] fix _get_quant_method of FusedMoE for deepseekV3.2 on non-NV… (#30057)
Signed-off-by: tjp_zju <tanjianpingzju1990@gmail.com>
6ecc1e41 · tjp_zju · GitHub · 0bb0bae4 · 6ecc1e41
Unverified Commit 6ecc1e41 authored Dec 14, 2025 by tjp_zju Committed by GitHub Dec 14, 2025
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 0 deletions

vllm/model_executor/layers/quantization/moe_wna16.py vllm/model_executor/layers/quantization/moe_wna16.py +5 -0

No files found.
--- a/vllm/model_executor/layers/quantization/moe_wna16.py
+++ b/vllm/model_executor/layers/quantization/moe_wna16.py
@@ -17,6 +17,9 @@ from vllm.model_executor.layers.fused_moe.layer import (
    FusedMoEMethodBase,
    FusedMoeWeightScaleSupported,
 )
+from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
+    UnquantizedFusedMoEMethod,
+)
 from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
 from vllm.model_executor.layers.quantization import QuantizationMethods
 from vllm.model_executor.layers.quantization.base_config import (
@@ -162,6 +165,8 @@ class MoeWNA16Config(QuantizationConfig):
        self, layer: torch.nn.Module, prefix: str
    ) -> Optional["QuantizeMethodBase"]:
        if is_layer_skipped_quant(prefix, self.modules_to_not_convert):
+            if isinstance(layer, FusedMoE):
+                return UnquantizedFusedMoEMethod(layer.moe_config)
            return UnquantizedLinearMethod()
        elif isinstance(layer, LinearBase):
            # Avoid circular import