[Bugfix] Avoid unnecessary reduce-scatter call in prepare_mlp (#9169)

98457c04 · Huaixin Chang · GitHub · 0fc8bf2c · 98457c04
Unverified Commit 98457c04 authored Aug 14, 2025 by Huaixin Chang Committed by GitHub Aug 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

python/sglang/srt/layers/dp_attention.py python/sglang/srt/layers/dp_attention.py +4 -0

No files found.
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -292,6 +292,10 @@ def _dp_gather_via_all_gather(
    forward_batch: ForwardBatch,
    is_partial: bool,
 ):
+    if get_attention_tp_size() == 1:
+        get_tp_group().all_gather_into_tensor(global_tokens, local_tokens)
+        return
+
    if not is_partial:
        if get_attention_tp_rank() != 0:
            local_tokens.fill_(0)