Unverified Commit 81d5bb76 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix AWQ marlin layer skipping (#27416)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 0825197b
......@@ -178,7 +178,10 @@ class AWQMarlinConfig(QuantizationConfig):
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
):
if is_layer_skipped(
prefix, self.modules_to_not_convert, self.packed_modules_mapping
prefix,
self.modules_to_not_convert,
self.packed_modules_mapping,
skip_with_substr=True,
):
return UnquantizedLinearMethod()
# Check if the layer is supported by AWQMarlin.
......@@ -194,7 +197,11 @@ class AWQMarlinConfig(QuantizationConfig):
elif isinstance(layer, FusedMoE):
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
if is_layer_skipped(
prefix,
getattr(self, "modules_to_not_convert", []),
skip_with_substr=True,
):
return UnquantizedFusedMoEMethod(layer.moe_config)
if not check_moe_marlin_supports_layer(layer, self.group_size):
logger.warning_once(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment