"tests/vscode:/vscode.git/clone" did not exist on "3b7178cfa4a317922d4aef9dd3b2647b8d950e7d"
Commit 82c0bf76 authored by 王敏's avatar 王敏
Browse files

[fix]解决glm4 moe + mtp精度异常

parent 4d70732e
...@@ -215,10 +215,17 @@ class Glm4MoeMTP(nn.Module, SupportsPP): ...@@ -215,10 +215,17 @@ class Glm4MoeMTP(nn.Module, SupportsPP):
params_dict = dict(self.named_parameters()) params_dict = dict(self.named_parameters())
loaded_params: set[str] = set() loaded_params: set[str] = set()
for name, loaded_weight in weights: for name, loaded_weight in weights:
if name == "lm_head.weight":
spec_layer = self.model.mtp_start_layer_idx
name = f"model.layers.{spec_layer}.shared_head.head.weight"
elif name == "model.embed_tokens.weight":
spec_layer = self.model.mtp_start_layer_idx
else:
spec_layer = get_spec_layer_idx_from_weight_name(self.config, name) spec_layer = get_spec_layer_idx_from_weight_name(self.config, name)
if spec_layer is None: if spec_layer is None:
continue continue
name = self._rewrite_spec_layer_name(spec_layer, name) name = self._rewrite_spec_layer_name(spec_layer, name)
for (param_name, weight_name, shard_id) in stacked_params_mapping: for (param_name, weight_name, shard_id) in stacked_params_mapping:
# Skip non-stacked layers and experts (experts handled below). # Skip non-stacked layers and experts (experts handled below).
if weight_name not in name: if weight_name not in name:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment