Fix topk inference performance reduce (#6474)

2f427491 · Li Hui · GitHub · d8189660 · 2f427491
Unverified Commit 2f427491 authored May 23, 2025 by Li Hui Committed by GitHub May 23, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

python/sglang/srt/layers/moe/topk.py python/sglang/srt/layers/moe/topk.py +2 -0

No files found.
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -264,6 +264,8 @@ def biased_grouped_topk(
        # TODO merge into kernel for this branch
        topk_ids = topk_ids_logical_to_physical(topk_ids, expert_location_dispatch_info)
        # TODO will fuse this into kernel, thus use slow manual operation now
+        if num_token_non_padded is None:
+            return topk_weights, topk_ids
        torch.compile(
            _mask_topk_ids_padded_region, dynamic=True, backend=get_compiler_backend()
        )(topk_ids, num_token_non_padded)