"git@developer.sourcefind.cn:change/sglang.git" did not exist on "e7bc600304e98fa54184f4d7331b4e68016890b4"
Unverified Commit 69dd878b authored by Li Hui's avatar Li Hui Committed by GitHub
Browse files

Fix shared experts fusion error (#6289)

parent 22630ca2
...@@ -1903,23 +1903,49 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -1903,23 +1903,49 @@ class DeepseekV2ForCausalLM(nn.Module):
if self.n_share_experts_fusion > 0: if self.n_share_experts_fusion > 0:
weights_list = list(weights) weights_list = list(weights)
weights_dict = dict(weights_list) weights_dict = dict(weights_list)
if self.quant_config is None or self.quant_config.get_name() == "w8a8_int8": if self.quant_config is not None:
suffix_list = [ if self.quant_config.get_name() == "w8a8_int8":
"down_proj.weight", suffix_list = [
"down_proj.weight_scale", "down_proj.weight",
"gate_proj.weight", "down_proj.weight_scale",
"gate_proj.weight_scale", "gate_proj.weight",
"up_proj.weight", "gate_proj.weight_scale",
"up_proj.weight_scale", "up_proj.weight",
] "up_proj.weight_scale",
]
elif (
self.quant_config.get_name() == "fp8"
or self.quant_config.get_name() == "blockwise_int8"
):
suffix_list = [
"down_proj.weight",
"down_proj.weight_scale_inv",
"gate_proj.weight",
"gate_proj.weight_scale_inv",
"up_proj.weight",
"up_proj.weight_scale_inv",
]
elif self.quant_config.get_name() == "awq":
suffix_list = [
"down_proj.qweight",
"down_proj.qzeros",
"down_proj.scales",
"gate_proj.qweight",
"gate_proj.qzeros",
"gate_proj.scales",
"up_proj.qweight",
"up_proj.qzeros",
"up_proj.scales",
]
else:
raise ValueError(
f"Unsupported shared expert fusion for quantization: {self.quant_config.get_name()}."
)
else: else:
suffix_list = [ suffix_list = [
"down_proj.weight", "down_proj.weight",
"down_proj.weight_scale_inv",
"gate_proj.weight", "gate_proj.weight",
"gate_proj.weight_scale_inv",
"up_proj.weight", "up_proj.weight",
"up_proj.weight_scale_inv",
] ]
names_to_remove = [] names_to_remove = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment