Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f02b3269
Unverified
Commit
f02b3269
authored
Apr 14, 2026
by
Hexiang Wang
Committed by
GitHub
Apr 14, 2026
Browse files
[PluggableLayer][3/N] Apply PluggableLayer to moe-related layers. (#33556)
Signed-off-by:
whx-sjtu
<
2952154980@qq.com
>
parent
e1e318af
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
13 deletions
+6
-13
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+4
-11
vllm/model_executor/models/transformers/moe.py
vllm/model_executor/models/transformers/moe.py
+2
-2
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
f02b3269
...
@@ -18,7 +18,7 @@ from vllm.distributed import (
...
@@ -18,7 +18,7 @@ from vllm.distributed import (
)
)
from
vllm.distributed.eplb.eplb_state
import
EplbLayerState
,
EplbState
from
vllm.distributed.eplb.eplb_state
import
EplbLayerState
,
EplbState
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.custom_op
import
CustomOp
from
vllm.model_executor.custom_op
import
PluggableLayer
from
vllm.model_executor.layers.fused_moe.activation
import
MoEActivation
from
vllm.model_executor.layers.fused_moe.activation
import
MoEActivation
from
vllm.model_executor.layers.fused_moe.config
import
(
from
vllm.model_executor.layers.fused_moe.config
import
(
FusedMoEConfig
,
FusedMoEConfig
,
...
@@ -213,8 +213,8 @@ def get_compressed_expert_map(expert_map: torch.Tensor) -> str:
...
@@ -213,8 +213,8 @@ def get_compressed_expert_map(expert_map: torch.Tensor) -> str:
# --8<-- [start:fused_moe]
# --8<-- [start:fused_moe]
@
CustomOp
.
register
(
"fused_moe"
)
@
PluggableLayer
.
register
(
"fused_moe"
)
class
FusedMoE
(
CustomOp
):
class
FusedMoE
(
PluggableLayer
):
"""FusedMoE layer for MoE models.
"""FusedMoE layer for MoE models.
This layer contains both MergedColumnParallel weights (gate_up_proj /
This layer contains both MergedColumnParallel weights (gate_up_proj /
...
@@ -1532,7 +1532,7 @@ class FusedMoE(CustomOp):
...
@@ -1532,7 +1532,7 @@ class FusedMoE(CustomOp):
"""
"""
return
self
.
runner
.
maybe_all_reduce_tensor_model_parallel
(
final_hidden_states
)
return
self
.
runner
.
maybe_all_reduce_tensor_model_parallel
(
final_hidden_states
)
def
forward
_native
(
def
forward
(
self
,
self
,
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
router_logits
:
torch
.
Tensor
,
router_logits
:
torch
.
Tensor
,
...
@@ -1548,13 +1548,6 @@ class FusedMoE(CustomOp):
...
@@ -1548,13 +1548,6 @@ class FusedMoE(CustomOp):
self
.
_expert_map
if
not
self
.
rocm_aiter_fmoe_enabled
else
self
.
expert_mask
self
.
_expert_map
if
not
self
.
rocm_aiter_fmoe_enabled
else
self
.
expert_mask
)
)
def
forward_cuda
(
self
,
hidden_states
:
torch
.
Tensor
,
router_logits
:
torch
.
Tensor
,
)
->
torch
.
Tensor
|
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
return
self
.
forward_native
(
hidden_states
,
router_logits
)
@
classmethod
@
classmethod
def
make_expert_params_mapping
(
def
make_expert_params_mapping
(
cls
,
cls
,
...
...
vllm/model_executor/models/transformers/moe.py
View file @
f02b3269
...
@@ -24,7 +24,7 @@ import torch.nn as nn
...
@@ -24,7 +24,7 @@ import torch.nn as nn
from
vllm.config.utils
import
getattr_iter
from
vllm.config.utils
import
getattr_iter
from
vllm.distributed
import
get_dp_group
,
get_ep_group
from
vllm.distributed
import
get_dp_group
,
get_ep_group
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
from
vllm.model_executor.custom_op
import
CustomOp
from
vllm.model_executor.custom_op
import
PluggableLayer
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.models.interfaces
import
MixtureOfExperts
from
vllm.model_executor.models.interfaces
import
MixtureOfExperts
from
vllm.model_executor.models.utils
import
maybe_prefix
from
vllm.model_executor.models.utils
import
maybe_prefix
...
@@ -38,7 +38,7 @@ if TYPE_CHECKING:
...
@@ -38,7 +38,7 @@ if TYPE_CHECKING:
# --8<-- [start:transformers_fused_moe]
# --8<-- [start:transformers_fused_moe]
@
CustomOp
.
register
(
"transformers_fused_moe"
)
@
PluggableLayer
.
register
(
"transformers_fused_moe"
)
class
TransformersFusedMoE
(
FusedMoE
):
class
TransformersFusedMoE
(
FusedMoE
):
"""Custom FusedMoE for the Transformers modeling backend."""
"""Custom FusedMoE for the Transformers modeling backend."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment