"docs/vscode:/vscode.git/clone" did not exist on "243137143c81f738db17cfcd93d991f6dd842e27"
Unverified Commit ddc90483 authored by David-Wen's avatar David-Wen Committed by GitHub
Browse files

Fix: Correct FusedMoE layer reference in auto_round quantization (#24818)


Signed-off-by: default avatarDavid-Wen <18927700430@163.com>
Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent b1a63d1b
...@@ -241,7 +241,7 @@ class AutoRoundConfig(QuantizationConfig): ...@@ -241,7 +241,7 @@ class AutoRoundConfig(QuantizationConfig):
if isinstance(layer, FusedMoE): if isinstance(layer, FusedMoE):
if use_marlin: if use_marlin:
return AWQMoEMethod(quant_args_marlin, layer.moe) return AWQMoEMethod(quant_args_marlin, layer.moe_config)
from vllm.model_executor.layers.quantization.moe_wna16 import ( from vllm.model_executor.layers.quantization.moe_wna16 import (
MoeWNA16Config) MoeWNA16Config)
...@@ -327,7 +327,7 @@ class AutoRoundConfig(QuantizationConfig): ...@@ -327,7 +327,7 @@ class AutoRoundConfig(QuantizationConfig):
if isinstance(layer, FusedMoE): if isinstance(layer, FusedMoE):
if use_marlin: if use_marlin:
return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe) return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe_config)
else: else:
from vllm.model_executor.layers.quantization.moe_wna16 import ( from vllm.model_executor.layers.quantization.moe_wna16 import (
MoeWNA16Config) MoeWNA16Config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment