Commit 262ddd0d authored by khluu's avatar khluu
Browse files

[cherry-pick][Bugfix] Fix EP weight filter breaking EPLB and NVFP4 accuracy #37322


Signed-off-by: default avatarkhluu <khluu000@gmail.com>
parent e60c1674
...@@ -319,6 +319,13 @@ class DefaultModelLoader(BaseModelLoader): ...@@ -319,6 +319,13 @@ class DefaultModelLoader(BaseModelLoader):
and parallel_config.enable_ep_weight_filter and parallel_config.enable_ep_weight_filter
): ):
return return
# When EPLB is enabled, redundant physical expert slots may map to
# logical experts that belong to other ranks in the default partition.
# The weight loader needs to see ALL logical expert weights so it can
# populate these redundant slots. Skip the filter entirely.
if parallel_config.enable_eplb:
return
num_experts = model_config.get_num_experts() num_experts = model_config.get_num_experts()
if num_experts <= 0: if num_experts <= 0:
......
...@@ -73,4 +73,9 @@ def should_skip_weight( ...@@ -73,4 +73,9 @@ def should_skip_weight(
if eid is None: if eid is None:
# Not an expert weight (dense / shared-expert / embedding) → keep. # Not an expert weight (dense / shared-expert / embedding) → keep.
return False return False
# Only skip heavy weight tensors, never scale/metadata tensors.
# Scale tensors are tiny and some backends need them from ALL experts
# (e.g. FlashInfer NVFP4 computes a global max of activation scales).
if not weight_name.endswith(".weight"):
return False
return eid not in local_expert_ids return eid not in local_expert_ids
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment