Unverified Commit c6df05eb authored by Zhiwei's avatar Zhiwei Committed by GitHub
Browse files

[ROCm] [Fused Moe EP] Use binary expert mask for aiter fused moe kernel (#29773)


Signed-off-by: default avatarZhiweiYan-96 <zhiwei.yan@amd.com>
parent d726a7b0
...@@ -520,6 +520,10 @@ class FusedMoE(CustomOp): ...@@ -520,6 +520,10 @@ class FusedMoE(CustomOp):
self._init_aiter_shared_experts_topK_buffer( self._init_aiter_shared_experts_topK_buffer(
vllm_config=vllm_config, dp_size=dp_size_ vllm_config=vllm_config, dp_size=dp_size_
) )
if self.use_ep and self.rocm_aiter_fmoe_enabled:
assert self.expert_mask is None or torch.all(
(expert_mask == 0) | (expert_mask == 1)
), "Aiter Fused MoE kernel only supports expert_map with 0 and 1s."
assert intermediate_size % self.tp_size == 0 assert intermediate_size % self.tp_size == 0
self.hidden_size = hidden_size self.hidden_size = hidden_size
......
...@@ -633,6 +633,7 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod): ...@@ -633,6 +633,7 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod):
topk_ids=topk_ids, topk_ids=topk_ids,
activation=activation, activation=activation,
quant_config=self.moe_quant_config, quant_config=self.moe_quant_config,
expert_map=expert_map,
) )
else: else:
from vllm.model_executor.layers.fused_moe import fused_experts from vllm.model_executor.layers.fused_moe import fused_experts
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment