Commit 51af08ac authored by zhuwenwen's avatar zhuwenwen
Browse files

update deepseek_v2.py

parent cf360956
...@@ -1039,10 +1039,7 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts): ...@@ -1039,10 +1039,7 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
if is_pp_missing_parameter(name_mapped, self): if is_pp_missing_parameter(name_mapped, self):
continue continue
try: param = params_dict[name_mapped]
param = params_dict[name_mapped]
except Exception as e:
continue
# We should ask the weight loader to return success or not # We should ask the weight loader to return success or not
# here since otherwise we may skip experts with other # here since otherwise we may skip experts with other
# available replicas. # available replicas.
...@@ -1076,7 +1073,10 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts): ...@@ -1076,7 +1073,10 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
if is_pp_missing_parameter(name, self): if is_pp_missing_parameter(name, self):
continue continue
param = params_dict[name] try:
param = params_dict[name]
except Exception as e:
continue
weight_loader = getattr(param, "weight_loader", weight_loader = getattr(param, "weight_loader",
default_weight_loader) default_weight_loader)
weight_loader(param, loaded_weight) weight_loader(param, loaded_weight)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment