fix the break in FlashInferFusedMoE (#10356)

Co-authored-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com>

fix the break in FlashInferFusedMoE (#10356)
Co-authored-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com>
4aa39d72 · chenqianfzh · GitHub · b4c2c421 · 4aa39d72
Unverified Commit 4aa39d72 authored Sep 11, 2025 by chenqianfzh Committed by GitHub Sep 11, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

python/sglang/srt/layers/moe/fused_moe_triton/layer.py python/sglang/srt/layers/moe/fused_moe_triton/layer.py +4 -2

No files found.
--- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
@@ -26,6 +26,7 @@ from sglang.srt.layers.moe import (
 from sglang.srt.layers.moe.token_dispatcher.standard import (
    CombineInput,
    StandardDispatcher,
+    StandardDispatchOutput,
 )
 from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker
 from sglang.srt.layers.quantization.base_config import (
@@ -981,8 +982,9 @@ class FlashInferFusedMoE(FusedMoE):
        # Matrix multiply.
        final_hidden_states = self.quant_method.apply_with_router_logits(
            layer=self,
-            x=hidden_states,
+            dispatch_output=StandardDispatchOutput(
-            topk_output=topk_output,
+                hidden_states=hidden_states, topk_output=topk_output
+            ),
        )
        if self.reduce_results and (self.moe_tp_size > 1 or self.moe_ep_size > 1):