"vscode:/vscode.git/clone" did not exist on "81eaff625c0e832cd6b72f9ddd90feabd2277b1c"
Unverified Commit b66b0d6a authored by Rabi Mishra's avatar Rabi Mishra Committed by GitHub
Browse files

fix(rocm): Enable non-gated MoE (is_act_and_mul=False) support on ROCm (#32244)


Signed-off-by: default avatarrabi <ramishra@redhat.com>
parent 03da3b52
......@@ -448,9 +448,13 @@ class FusedMoE(CustomOp):
)
# ROCm aiter shared experts fusion
self.rocm_aiter_fmoe_enabled = rocm_aiter_ops.is_fused_moe_enabled()
# AITER only supports gated activations (silu/gelu), so disable it
# for non-gated MoE (is_act_and_mul=False)
self.rocm_aiter_fmoe_enabled = (
rocm_aiter_ops.is_fused_moe_enabled() and is_act_and_mul
)
self.aiter_fmoe_shared_expert_enabled = (
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()
rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() and is_act_and_mul
)
self.num_fused_shared_experts = (
......@@ -619,9 +623,9 @@ class FusedMoE(CustomOp):
# for heuristic purposes, so it must be initialized first.
self.quant_method: FusedMoEMethodBase = _get_quant_method()
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda():
if not self.moe_config.is_act_and_mul and not current_platform.is_cuda_alike():
raise NotImplementedError(
"is_act_and_mul=False is supported only for CUDA for now"
"is_act_and_mul=False is supported only for CUDA and ROCm for now"
)
if self.enable_eplb and not self.quant_method.supports_eplb:
......
......@@ -8,6 +8,7 @@ from torch.nn import Module
import vllm.envs as envs
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm._aiter_ops import rocm_aiter_ops
from vllm.logger import init_logger
from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.layers.fused_moe.config import (
......@@ -56,6 +57,12 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
use_ep=self.moe.moe_parallel_config.use_ep,
use_dp=self.moe.moe_parallel_config.dp_size > 1,
)
# AITER only supports gated activations (silu/gelu), so disable it
# for non-gated MoE (is_act_and_mul=False)
self.rocm_aiter_moe_enabled = (
rocm_aiter_ops.is_fused_moe_enabled() and moe.is_act_and_mul
)
self.kernel: mk.FusedMoEModularKernel | None = None
@property
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment