Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
262ddd0d
Commit
262ddd0d
authored
Mar 18, 2026
by
khluu
Browse files
[cherry-pick][Bugfix] Fix EP weight filter breaking EPLB and NVFP4 accuracy #37322
Signed-off-by:
khluu
<
khluu000@gmail.com
>
parent
e60c1674
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
0 deletions
+12
-0
vllm/model_executor/model_loader/default_loader.py
vllm/model_executor/model_loader/default_loader.py
+7
-0
vllm/model_executor/model_loader/ep_weight_filter.py
vllm/model_executor/model_loader/ep_weight_filter.py
+5
-0
No files found.
vllm/model_executor/model_loader/default_loader.py
View file @
262ddd0d
...
@@ -319,6 +319,13 @@ class DefaultModelLoader(BaseModelLoader):
...
@@ -319,6 +319,13 @@ class DefaultModelLoader(BaseModelLoader):
and
parallel_config
.
enable_ep_weight_filter
and
parallel_config
.
enable_ep_weight_filter
):
):
return
return
# When EPLB is enabled, redundant physical expert slots may map to
# logical experts that belong to other ranks in the default partition.
# The weight loader needs to see ALL logical expert weights so it can
# populate these redundant slots. Skip the filter entirely.
if
parallel_config
.
enable_eplb
:
return
num_experts
=
model_config
.
get_num_experts
()
num_experts
=
model_config
.
get_num_experts
()
if
num_experts
<=
0
:
if
num_experts
<=
0
:
...
...
vllm/model_executor/model_loader/ep_weight_filter.py
View file @
262ddd0d
...
@@ -73,4 +73,9 @@ def should_skip_weight(
...
@@ -73,4 +73,9 @@ def should_skip_weight(
if
eid
is
None
:
if
eid
is
None
:
# Not an expert weight (dense / shared-expert / embedding) → keep.
# Not an expert weight (dense / shared-expert / embedding) → keep.
return
False
return
False
# Only skip heavy weight tensors, never scale/metadata tensors.
# Scale tensors are tiny and some backends need them from ALL experts
# (e.g. FlashInfer NVFP4 computes a global max of activation scales).
if
not
weight_name
.
endswith
(
".weight"
):
return
False
return
eid
not
in
local_expert_ids
return
eid
not
in
local_expert_ids
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment