Commit 63f1c793 authored by zhuwenwen's avatar zhuwenwen
Browse files

feat: moe_align_block_size 更新lightop 接口,加入对ep的支持

parent dfe1a844
...@@ -17,7 +17,8 @@ def moe_align_block_size( ...@@ -17,7 +17,8 @@ def moe_align_block_size(
num_experts: int, num_experts: int,
expert_map: Optional[torch.Tensor] = None, expert_map: Optional[torch.Tensor] = None,
pad_sorted_ids: bool = False, pad_sorted_ids: bool = False,
num_token: Optional[int] = None num_token: Optional[int] = None,
expert_mask: Optional[torch.Tensor] = None
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
""" """
Aligns the token distribution across experts to be compatible with block Aligns the token distribution across experts to be compatible with block
...@@ -94,14 +95,17 @@ def moe_align_block_size( ...@@ -94,14 +95,17 @@ def moe_align_block_size(
dtype=torch.int32, dtype=torch.int32,
device=topk_ids.device) device=topk_ids.device)
if envs.VLLM_USE_LIGHTOP: if envs.VLLM_USE_LIGHTOP or expert_mask is not None:
from lightop import op as op from lightop import op as op
op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids, op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
expert_ids, num_tokens_post_pad, None) expert_ids, num_tokens_post_pad,
expert_map = expert_map,
expert_mask = expert_mask,
num_local_tokens = None)
else: else:
ops.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids, ops.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
expert_ids, num_tokens_post_pad) expert_ids, num_tokens_post_pad)
if expert_map is not None: if expert_map is not None:
expert_ids = expert_map[expert_ids] expert_ids = expert_map[expert_ids]
return sorted_ids, expert_ids, num_tokens_post_pad return sorted_ids, expert_ids, num_tokens_post_pad
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment