"docs/vscode:/vscode.git/clone" did not exist on "d6fa1be3a8ef71fa16f74afdc5d07d27cbf725b1"
Unverified Commit f5dfa075 authored by noiji's avatar noiji Committed by GitHub
Browse files

[Bugfix] Skip loading extra parameters for modelopt Qwen3 MoE model (#19598)

Signed-off-by: noiji <>
parent 022c58b8
...@@ -386,6 +386,11 @@ class Qwen3MoeModel(nn.Module): ...@@ -386,6 +386,11 @@ class Qwen3MoeModel(nn.Module):
("gate_up_proj", "up_proj", 1), ("gate_up_proj", "up_proj", 1),
] ]
# Skip loading extra parameters for GPTQ/modelopt models.
ignore_suffixes = (".bias", "_bias", ".k_scale", "_k_scale",
".v_scale", "_v_scale", ".weight_scale",
"_weight_scale", ".input_scale", "_input_scale")
# Params for weights, fp8 weight scales, fp8 activation scales # Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id) # (param_name, weight_name, expert_id, shard_id)
expert_params_mapping = FusedMoE.make_expert_params_mapping( expert_params_mapping = FusedMoE.make_expert_params_mapping(
...@@ -410,10 +415,11 @@ class Qwen3MoeModel(nn.Module): ...@@ -410,10 +415,11 @@ class Qwen3MoeModel(nn.Module):
if "mlp.experts" in name: if "mlp.experts" in name:
continue continue
name = name.replace(weight_name, param_name) name = name.replace(weight_name, param_name)
# Skip loading extra bias for GPTQ models.
if ((name.endswith(".bias") or name.endswith("_bias")) # Skip loading extra parameters for GPTQ/modelopt models.
and name not in params_dict): if name.endswith(ignore_suffixes) and name not in params_dict:
continue continue
# Skip layers on other devices. # Skip layers on other devices.
if is_pp_missing_parameter(name, self): if is_pp_missing_parameter(name, self):
continue continue
...@@ -433,9 +439,9 @@ class Qwen3MoeModel(nn.Module): ...@@ -433,9 +439,9 @@ class Qwen3MoeModel(nn.Module):
# Skip layers on other devices. # Skip layers on other devices.
if is_pp_missing_parameter(name, self): if is_pp_missing_parameter(name, self):
continue continue
# Skip loading extra bias for GPTQ models. # Skip loading extra parameters for GPTQ/modelopt models.
if ((name.endswith(".bias") or name.endswith("_bias")) if name.endswith(
and name not in params_dict): ignore_suffixes) and name not in params_dict:
continue continue
param = params_dict[name] param = params_dict[name]
weight_loader = param.weight_loader weight_loader = param.weight_loader
...@@ -446,9 +452,9 @@ class Qwen3MoeModel(nn.Module): ...@@ -446,9 +452,9 @@ class Qwen3MoeModel(nn.Module):
expert_id=expert_id) expert_id=expert_id)
break break
else: else:
# Skip loading extra bias for GPTQ models. # Skip loading extra parameters for GPTQ/modelopt models.
if ((name.endswith(".bias") or name.endswith("_bias")) if name.endswith(
and name not in params_dict): ignore_suffixes) and name not in params_dict:
continue continue
# Skip layers on other devices. # Skip layers on other devices.
if is_pp_missing_parameter(name, self): if is_pp_missing_parameter(name, self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment