Unverified Commit 1ece7f30 authored by Jun-Howie's avatar Jun-Howie Committed by GitHub
Browse files

Fix: AWQ Marlin get_quant_method does not recognize "modules_to_not_convert" (#21888)


Signed-off-by: default avatarJunHowie <JunHowie@aliyun.com>
Co-authored-by: default avatarJunHowie <JunHowie@aliyun.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent bc8372ef
...@@ -10,7 +10,8 @@ import vllm.model_executor.layers.fused_moe # noqa ...@@ -10,7 +10,8 @@ import vllm.model_executor.layers.fused_moe # noqa
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe.layer import ( from vllm.model_executor.layers.fused_moe.layer import (
FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported) FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported,
UnquantizedFusedMoEMethod)
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
UnquantizedLinearMethod, UnquantizedLinearMethod,
set_weight_attrs) set_weight_attrs)
...@@ -141,6 +142,9 @@ class AWQMarlinConfig(QuantizationConfig): ...@@ -141,6 +142,9 @@ class AWQMarlinConfig(QuantizationConfig):
elif isinstance(layer, FusedMoE): elif isinstance(layer, FusedMoE):
from vllm.model_executor.layers.quantization.moe_wna16 import ( from vllm.model_executor.layers.quantization.moe_wna16 import (
MoeWNA16Config) MoeWNA16Config)
if is_layer_skipped_awq(
prefix, getattr(self, "modules_to_not_convert", [])):
return UnquantizedFusedMoEMethod(layer.moe_config)
if not check_moe_marlin_supports_layer(layer, self.group_size): if not check_moe_marlin_supports_layer(layer, self.group_size):
logger.warning_once( logger.warning_once(
f"Layer '{prefix}' is not supported by AWQMoeMarlin. " f"Layer '{prefix}' is not supported by AWQMoeMarlin. "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment