Unverified Commit 6ecc1e41 authored by tjp_zju's avatar tjp_zju Committed by GitHub
Browse files

[Bugfix] fix _get_quant_method of FusedMoE for deepseekV3.2 on non-NV… (#30057)


Signed-off-by: default avatartjp_zju <tanjianpingzju1990@gmail.com>
parent 0bb0bae4
...@@ -17,6 +17,9 @@ from vllm.model_executor.layers.fused_moe.layer import ( ...@@ -17,6 +17,9 @@ from vllm.model_executor.layers.fused_moe.layer import (
FusedMoEMethodBase, FusedMoEMethodBase,
FusedMoeWeightScaleSupported, FusedMoeWeightScaleSupported,
) )
from vllm.model_executor.layers.fused_moe.unquantized_fused_moe_method import (
UnquantizedFusedMoEMethod,
)
from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.model_executor.layers.quantization import QuantizationMethods
from vllm.model_executor.layers.quantization.base_config import ( from vllm.model_executor.layers.quantization.base_config import (
...@@ -162,6 +165,8 @@ class MoeWNA16Config(QuantizationConfig): ...@@ -162,6 +165,8 @@ class MoeWNA16Config(QuantizationConfig):
self, layer: torch.nn.Module, prefix: str self, layer: torch.nn.Module, prefix: str
) -> Optional["QuantizeMethodBase"]: ) -> Optional["QuantizeMethodBase"]:
if is_layer_skipped_quant(prefix, self.modules_to_not_convert): if is_layer_skipped_quant(prefix, self.modules_to_not_convert):
if isinstance(layer, FusedMoE):
return UnquantizedFusedMoEMethod(layer.moe_config)
return UnquantizedLinearMethod() return UnquantizedLinearMethod()
elif isinstance(layer, LinearBase): elif isinstance(layer, LinearBase):
# Avoid circular import # Avoid circular import
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment