Unverified Commit 99795d61 authored by Even Zhou's avatar Even Zhou Committed by GitHub
Browse files

[Bugfix] fix w8a8_int8 load issue (#8308)


Co-authored-by: default avatarronnie_zheng <zl19940307@163.com>
parent fe5086fd
...@@ -231,7 +231,10 @@ class W8A8Int8Config(QuantizationConfig): ...@@ -231,7 +231,10 @@ class W8A8Int8Config(QuantizationConfig):
@classmethod @classmethod
def get_config_filenames(cls) -> List[str]: def get_config_filenames(cls) -> List[str]:
return [] filenames = []
if _is_npu:
filenames.append("quant_model_description.json")
return filenames
@classmethod @classmethod
def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config: def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config:
......
...@@ -229,6 +229,8 @@ def get_quant_config( ...@@ -229,6 +229,8 @@ def get_quant_config(
f"Unsupported quantization config" f"Unsupported quantization config"
f" found for {model_config.quantization} in {f}." f" found for {model_config.quantization} in {f}."
) )
elif model_config.quantization == "w8a8_int8":
config["packed_modules_mapping"] = packed_modules_mapping
return quant_cls.from_config(config) return quant_cls.from_config(config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment