Optimize MoE topk with torch compile (#3236)

1ebe1d6d · Ke Bao · GitHub · 7811bfda · 1ebe1d6d
Unverified Commit 1ebe1d6d authored Feb 01, 2025 by Ke Bao Committed by GitHub Feb 01, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

python/sglang/srt/layers/moe/topk.py python/sglang/srt/layers/moe/topk.py +4 -0

No files found.
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -17,6 +17,8 @@ from typing import Callable, Optional
 import torch
 import torch.nn.functional as F
+from sglang.srt.utils import get_compiler_backend
 def fused_topk_native(
    hidden_states: torch.Tensor,
@@ -74,6 +76,7 @@ def fused_topk(
 # This is used by the Deepseek-V2 model
+@torch.compile(dynamic=True, backend=get_compiler_backend())
 def grouped_topk(
    hidden_states: torch.Tensor,
    gating_output: torch.Tensor,
@@ -108,6 +111,7 @@ def grouped_topk(
    return topk_weights.to(torch.float32), topk_ids.to(torch.int32)
+@torch.compile(dynamic=True, backend=get_compiler_backend())
 def biased_grouped_topk(
    hidden_states: torch.Tensor,
    gating_output: torch.Tensor,