[Bugfix] fix qwen3 moe fp8 accuracy issue (#23031)

Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>

[Bugfix] fix qwen3 moe fp8 accuracy issue (#23031)
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
a258ad8b · Jinzhen Lin · GitHub · bf7f470b · a258ad8b
Unverified Commit a258ad8b authored Aug 17, 2025 by Jinzhen Lin Committed by GitHub Aug 16, 2025
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

vllm/model_executor/layers/quantization/fp8.py vllm/model_executor/layers/quantization/fp8.py +4 -0

No files found.
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -125,6 +125,10 @@ class Fp8Config(QuantizationConfig):
        ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None)
        weight_block_size = cls.get_from_keys_or(config, ["weight_block_size"],
                                                 None)
+        if not ignored_layers:
+            ignored_layers = cls.get_from_keys_or(config,
+                                                  ["modules_to_not_convert"],
+                                                  None)
        return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized,
                   activation_scheme=activation_scheme,
                   ignored_layers=ignored_layers,