Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c9d3c6e6
Unverified
Commit
c9d3c6e6
authored
Apr 24, 2026
by
Itay Alroy
Committed by
GitHub
Apr 24, 2026
Browse files
fused_moe: treat NIXL EP as batched experts (#40412)
Signed-off-by:
Itay Alroy
<
ialroy@nvidia.com
>
parent
51adca74
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
9 deletions
+12
-9
vllm/model_executor/layers/fused_moe/config.py
vllm/model_executor/layers/fused_moe/config.py
+8
-0
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+2
-4
vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
+1
-1
vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
+1
-4
No files found.
vllm/model_executor/layers/fused_moe/config.py
View file @
c9d3c6e6
...
...
@@ -993,6 +993,10 @@ class FusedMoEParallelConfig:
def
use_batched_activation_format
(
self
):
return
self
.
use_deepep_ll_kernels
or
self
.
use_nixl_ep_kernels
@
property
def
needs_round_robin_routing_tables
(
self
):
return
self
.
use_deepep_ll_kernels
or
self
.
use_nixl_ep_kernels
@
property
def
use_ag_rs_all2all_kernels
(
self
):
return
(
...
...
@@ -1294,3 +1298,7 @@ class FusedMoEConfig:
@
property
def
use_nixl_ep_kernels
(
self
):
return
self
.
moe_parallel_config
.
use_nixl_ep_kernels
@
property
def
needs_round_robin_routing_tables
(
self
):
return
self
.
moe_parallel_config
.
needs_round_robin_routing_tables
vllm/model_executor/layers/fused_moe/layer.py
View file @
c9d3c6e6
...
...
@@ -180,8 +180,7 @@ def determine_expert_placement_strategy(
return
"linear"
if
(
moe_parallel_config
.
use_all2all_kernels
and
not
moe_parallel_config
.
use_deepep_ll_kernels
and
not
moe_parallel_config
.
use_nixl_ep_kernels
and
not
moe_parallel_config
.
needs_round_robin_routing_tables
):
logger
.
warning
(
"Round-robin expert placement currently only supports "
...
...
@@ -687,8 +686,7 @@ class FusedMoE(PluggableLayer):
# Currently routing_tables only needed for round-robin expert placement
# with DeepEP-ll or NIXL EP all2all backends.
if
self
.
expert_placement_strategy
!=
"round_robin"
or
(
not
self
.
moe_parallel_config
.
use_deepep_ll_kernels
and
not
self
.
moe_parallel_config
.
use_nixl_ep_kernels
not
self
.
moe_parallel_config
.
needs_round_robin_routing_tables
):
return
None
...
...
vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
View file @
c9d3c6e6
...
...
@@ -884,7 +884,7 @@ def make_mxfp4_moe_kernel(
experts
,
shared_experts
=
(
shared_experts
if
moe_config
.
moe_parallel_config
.
use_
deepep_ll_kernels
if
moe_config
.
moe_parallel_config
.
use_
batched_activation_format
else
None
),
inplace
=
(
...
...
vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
View file @
c9d3c6e6
...
...
@@ -168,10 +168,7 @@ def select_nvfp4_moe_backend(
NvFp4MoeBackend
.
EMULATION
,
]
# NOTE(rob): this is kind of a hack. We need to peak into
# the prepare-finalize selection to determine if we are using
# the batched or standard expert format.
use_batched
=
config
.
moe_parallel_config
.
use_deepep_ll_kernels
use_batched
=
config
.
moe_parallel_config
.
use_batched_activation_format
activation_format
=
(
mk
.
FusedMoEActivationFormat
.
BatchedExperts
if
use_batched
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment