Unverified Commit 21802c4b authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping (#16031)


Signed-off-by: default avatarmgoin <michael@neuralmagic.com>
parent 652907b3
......@@ -116,7 +116,9 @@ class Fp8Config(QuantizationConfig):
from vllm.attention.layer import Attention # Avoid circular import
if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
if is_layer_skipped(prefix=prefix,
ignored_layers=self.ignored_layers,
fused_mapping=self.packed_modules_mapping):
return UnquantizedLinearMethod()
return Fp8LinearMethod(self)
elif isinstance(layer, FusedMoE):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment