Revert "feat: update grouped_topk to support softmax and sigmoid" (#4505)

3ded4b21 · Ke Bao · GitHub · f4d7ab7a · 3ded4b21
Unverified Commit 3ded4b21 authored Mar 18, 2025 by Ke Bao Committed by GitHub Mar 17, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 10 deletions

python/sglang/srt/layers/moe/topk.py python/sglang/srt/layers/moe/topk.py +3 -10

No files found.
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -88,6 +88,7 @@ def fused_topk(
    return topk_weights, topk_ids
+# This is used by the Deepseek V2/V3/R1 series models
 @torch.compile(dynamic=True, backend=get_compiler_backend())
 def grouped_topk(
    hidden_states: torch.Tensor,
@@ -96,17 +97,10 @@ def grouped_topk(
    renormalize: bool,
    num_expert_group: int = 0,
    topk_group: int = 0,
-    scoring_func: str = "softmax",
 ):
    assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
-    if scoring_func == "softmax":
+    scores = torch.softmax(gating_output, dim=-1)
-        scores = torch.softmax(gating_output, dim=-1)
-    elif scoring_func == "sigmoid":
-        scores = gating_output.sigmoid()
-    else:
-        raise ValueError(f"Scoring function '{scoring_func}' is not supported.")
    num_token = scores.shape[0]
    group_scores = (
        scores.view(num_token, num_expert_group, -1).max(dim=-1).values
@@ -130,7 +124,6 @@ def grouped_topk(
    return topk_weights.to(torch.float32), topk_ids.to(torch.int32)
-# DeepSeek V2/V3/R1 uses biased_grouped_top
 @torch.compile(dynamic=True, backend=get_compiler_backend())
 def biased_grouped_topk(
    hidden_states: torch.Tensor,
@@ -185,7 +178,7 @@ def select_experts(
    correction_bias: Optional[torch.Tensor] = None,
    torch_native: bool = False,
 ):
-    # DeepSeek V2/V3/R1 uses biased_grouped_top
+    # DeekSeekv2 uses grouped_top_k
    if use_grouped_topk:
        assert topk_group is not None
        assert num_expert_group is not None