"docs/vscode:/vscode.git/clone" did not exist on "c6c9122d50a836046a0bd5360bd98f29f4e70667"
Unverified Commit f02b3269 authored by Hexiang Wang's avatar Hexiang Wang Committed by GitHub
Browse files

[PluggableLayer][3/N] Apply PluggableLayer to moe-related layers. (#33556)


Signed-off-by: default avatarwhx-sjtu <2952154980@qq.com>
parent e1e318af
......@@ -18,7 +18,7 @@ from vllm.distributed import (
)
from vllm.distributed.eplb.eplb_state import EplbLayerState, EplbState
from vllm.logger import init_logger
from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.custom_op import PluggableLayer
from vllm.model_executor.layers.fused_moe.activation import MoEActivation
from vllm.model_executor.layers.fused_moe.config import (
FusedMoEConfig,
......@@ -213,8 +213,8 @@ def get_compressed_expert_map(expert_map: torch.Tensor) -> str:
# --8<-- [start:fused_moe]
@CustomOp.register("fused_moe")
class FusedMoE(CustomOp):
@PluggableLayer.register("fused_moe")
class FusedMoE(PluggableLayer):
"""FusedMoE layer for MoE models.
This layer contains both MergedColumnParallel weights (gate_up_proj /
......@@ -1532,7 +1532,7 @@ class FusedMoE(CustomOp):
"""
return self.runner.maybe_all_reduce_tensor_model_parallel(final_hidden_states)
def forward_native(
def forward(
self,
hidden_states: torch.Tensor,
router_logits: torch.Tensor,
......@@ -1548,13 +1548,6 @@ class FusedMoE(CustomOp):
self._expert_map if not self.rocm_aiter_fmoe_enabled else self.expert_mask
)
def forward_cuda(
self,
hidden_states: torch.Tensor,
router_logits: torch.Tensor,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
return self.forward_native(hidden_states, router_logits)
@classmethod
def make_expert_params_mapping(
cls,
......
......@@ -24,7 +24,7 @@ import torch.nn as nn
from vllm.config.utils import getattr_iter
from vllm.distributed import get_dp_group, get_ep_group
from vllm.forward_context import ForwardContext, get_forward_context
from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.custom_op import PluggableLayer
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.models.interfaces import MixtureOfExperts
from vllm.model_executor.models.utils import maybe_prefix
......@@ -38,7 +38,7 @@ if TYPE_CHECKING:
# --8<-- [start:transformers_fused_moe]
@CustomOp.register("transformers_fused_moe")
@PluggableLayer.register("transformers_fused_moe")
class TransformersFusedMoE(FusedMoE):
"""Custom FusedMoE for the Transformers modeling backend."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment