Unverified Commit 4aa39d72 authored by chenqianfzh's avatar chenqianfzh Committed by GitHub
Browse files

fix the break in FlashInferFusedMoE (#10356)


Co-authored-by: default avatarHo-Ren (Jack) Chuang <horenchuang@bytedance.com>
parent b4c2c421
......@@ -26,6 +26,7 @@ from sglang.srt.layers.moe import (
from sglang.srt.layers.moe.token_dispatcher.standard import (
CombineInput,
StandardDispatcher,
StandardDispatchOutput,
)
from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker
from sglang.srt.layers.quantization.base_config import (
......@@ -981,8 +982,9 @@ class FlashInferFusedMoE(FusedMoE):
# Matrix multiply.
final_hidden_states = self.quant_method.apply_with_router_logits(
layer=self,
x=hidden_states,
topk_output=topk_output,
dispatch_output=StandardDispatchOutput(
hidden_states=hidden_states, topk_output=topk_output
),
)
if self.reduce_results and (self.moe_tp_size > 1 or self.moe_ep_size > 1):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment