[Bugfix] fix w8a8_int8 load issue (#8308)

Co-authored-by: ronnie_zheng <zl19940307@163.com>

[Bugfix] fix w8a8_int8 load issue (#8308)
Co-authored-by: ronnie_zheng <zl19940307@163.com>
99795d61 · Even Zhou · GitHub · fe5086fd · 99795d61 · 99795d61
Unverified Commit 99795d61 authored Aug 01, 2025 by Even Zhou Committed by GitHub Jul 31, 2025
Showing with 6 additions and 1 deletion

python/sglang/srt/layers/quantization/w8a8_int8.py python/sglang/srt/layers/quantization/w8a8_int8.py +4 -1

python/sglang/srt/model_loader/weight_utils.py python/sglang/srt/model_loader/weight_utils.py +2 -0

No files found.
--- a/python/sglang/srt/layers/quantization/w8a8_int8.py
+++ b/python/sglang/srt/layers/quantization/w8a8_int8.py
@@ -231,7 +231,10 @@ class W8A8Int8Config(QuantizationConfig):
    @classmethod
    def get_config_filenames(cls) -> List[str]:
-        return []
+        filenames = []
+        if _is_npu:
+            filenames.append("quant_model_description.json")
+        return filenames
    @classmethod
    def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config:

--- a/python/sglang/srt/model_loader/weight_utils.py
+++ b/python/sglang/srt/model_loader/weight_utils.py
@@ -229,6 +229,8 @@ def get_quant_config(
                    f"Unsupported quantization config"
                    f" found for {model_config.quantization} in {f}."
                )
+        elif model_config.quantization == "w8a8_int8":
+            config["packed_modules_mapping"] = packed_modules_mapping
    return quant_cls.from_config(config)