Unverified Commit 9b94d6ec authored by Andy Chen's avatar Andy Chen Committed by GitHub
Browse files

Enable 4bit bnb prequant MOE (#21548)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 1891a265
...@@ -427,14 +427,10 @@ class BitsAndBytesModelLoader(BaseModelLoader): ...@@ -427,14 +427,10 @@ class BitsAndBytesModelLoader(BaseModelLoader):
elif isinstance(module, FusedMoE) and hasattr( elif isinstance(module, FusedMoE) and hasattr(
module.quant_method, "quant_config"): module.quant_method, "quant_config"):
# TODO: support FusedMoE with prequant and 8bit. # TODO: support FusedMoE with prequant and 8bit.
if self.pre_quant: if self.pre_quant and self.load_8bit:
raise ValueError(
"Prequant BitsAndBytes models with FusedMoE is not "
"supported yet.")
if self.load_8bit:
raise ValueError( raise ValueError(
"BitsAndBytes 8bit quantization with FusedMoE is not " "Prequant BitsAndBytes 8bit models with FusedMoE "
"supported yet.") "is not supported yet.")
# Get the corresponding weight name using module name and # Get the corresponding weight name using module name and
# expert_params_mapping. # expert_params_mapping.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment