Unverified Commit ab01cd14 authored by wuyaoxuehun's avatar wuyaoxuehun Committed by GitHub
Browse files

[BugFix] Fix glm4_moe_mtp load weights bug (#28805)


Signed-off-by: default avatarwuyaoxuehun <798143193@qq.com>
parent 577bb34f
...@@ -256,13 +256,12 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts): ...@@ -256,13 +256,12 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
params_dict = dict(self.named_parameters()) params_dict = dict(self.named_parameters())
loaded_params: set[str] = set() loaded_params: set[str] = set()
spec_layer = self.model.mtp_start_layer_idx
for name, loaded_weight in weights: for name, loaded_weight in weights:
if name == "lm_head.weight": if name == "lm_head.weight":
name = f"model.layers.{spec_layer}.shard_head.head.weight" spec_layer = self.model.mtp_start_layer_idx
name = f"model.layers.{spec_layer}.shared_head.head.weight"
elif name == "model.embed_tokens.weight": elif name == "model.embed_tokens.weight":
# This name is same with local model, rewriting is not needed. spec_layer = self.model.mtp_start_layer_idx
pass
else: else:
spec_layer = get_spec_layer_idx_from_weight_name(self.config, name) spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
if spec_layer is None: if spec_layer is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment