update moe_sum and moe_align

1277ff09 · zhuwenwen · 7e68a7fe · 1277ff09 · 1277ff09
Commit 1277ff09 authored Oct 13, 2025 by zhuwenwen
2 changed files
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1918,12 +1918,13 @@ def fused_experts_impl(
        #             ops.moe_sum(intermediate_cache3.view(*intermediate_cache3.size()),
        #                     out_hidden_states[begin_chunk_idx:end_chunk_idx]) * routed_scaling_factor
        else:
-            if envs.VLLM_USE_OPT_MOE_SUM:
-                moe_reduce_dispatch(intermediate_cache3.view(*intermediate_cache3.size()), out_hidden_states[begin_chunk_idx:end_chunk_idx], begin_chunk_idx, end_chunk_idx)
-            elif envs.VLLM_USE_LIGHTOP:
+            if envs.VLLM_USE_LIGHTOP:
+                from lightop import op as op
                op.moe_sum(input=intermediate_cache3.view(*intermediate_cache3.size()),
                    output=out_hidden_states[begin_chunk_idx:end_chunk_idx], bias=None, 
                    expert_mask=None, num_local_tokens=None, factor=None)
+            elif envs.VLLM_USE_OPT_MOE_SUM:
+                moe_reduce_dispatch(intermediate_cache3.view(*intermediate_cache3.size()), out_hidden_states[begin_chunk_idx:end_chunk_idx], begin_chunk_idx, end_chunk_idx)
            else:
                ops.moe_sum(intermediate_cache3.view(*intermediate_cache3.size()),
                                out_hidden_states[begin_chunk_idx:end_chunk_idx])

--- a/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
+++ b/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
@@ -240,8 +240,16 @@ def moe_align_block_size(
                                expert_mask = expert_mask,
                                num_local_tokens = None)
    else:
-        ops.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
-                                 expert_ids, num_tokens_post_pad)
+        if envs.VLLM_USE_LIGHTOP:
+            from lightop import op as op
+            ops.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
+                                    expert_ids, num_tokens_post_pad)
+        else:
+            op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
+                                expert_ids, num_tokens_post_pad,
+                                expert_map = None,
+                                expert_mask = None,
+                                num_local_tokens = None)
        if expert_map is not None:
            expert_ids = expert_map[expert_ids]