Unverified Commit 69dd878b authored by Li Hui's avatar Li Hui Committed by GitHub
Browse files

Fix shared experts fusion error (#6289)

parent 22630ca2
...@@ -1903,7 +1903,8 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -1903,7 +1903,8 @@ class DeepseekV2ForCausalLM(nn.Module):
if self.n_share_experts_fusion > 0: if self.n_share_experts_fusion > 0:
weights_list = list(weights) weights_list = list(weights)
weights_dict = dict(weights_list) weights_dict = dict(weights_list)
if self.quant_config is None or self.quant_config.get_name() == "w8a8_int8": if self.quant_config is not None:
if self.quant_config.get_name() == "w8a8_int8":
suffix_list = [ suffix_list = [
"down_proj.weight", "down_proj.weight",
"down_proj.weight_scale", "down_proj.weight_scale",
...@@ -1912,7 +1913,10 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -1912,7 +1913,10 @@ class DeepseekV2ForCausalLM(nn.Module):
"up_proj.weight", "up_proj.weight",
"up_proj.weight_scale", "up_proj.weight_scale",
] ]
else: elif (
self.quant_config.get_name() == "fp8"
or self.quant_config.get_name() == "blockwise_int8"
):
suffix_list = [ suffix_list = [
"down_proj.weight", "down_proj.weight",
"down_proj.weight_scale_inv", "down_proj.weight_scale_inv",
...@@ -1921,6 +1925,28 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -1921,6 +1925,28 @@ class DeepseekV2ForCausalLM(nn.Module):
"up_proj.weight", "up_proj.weight",
"up_proj.weight_scale_inv", "up_proj.weight_scale_inv",
] ]
elif self.quant_config.get_name() == "awq":
suffix_list = [
"down_proj.qweight",
"down_proj.qzeros",
"down_proj.scales",
"gate_proj.qweight",
"gate_proj.qzeros",
"gate_proj.scales",
"up_proj.qweight",
"up_proj.qzeros",
"up_proj.scales",
]
else:
raise ValueError(
f"Unsupported shared expert fusion for quantization: {self.quant_config.get_name()}."
)
else:
suffix_list = [
"down_proj.weight",
"gate_proj.weight",
"up_proj.weight",
]
names_to_remove = [] names_to_remove = []
moe_layers = ( moe_layers = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment