[bug fix] Ensure local token and global token buffers are pointing to different storage (#8785)

92cbef59 · Elfie Guo · GitHub · b3359dc9 · 92cbef59
Unverified Commit 92cbef59 authored Aug 08, 2025 by Elfie Guo Committed by GitHub Aug 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

python/sglang/srt/layers/dp_attention.py python/sglang/srt/layers/dp_attention.py +8 -6

No files found.
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -264,9 +264,10 @@ def _dp_gather_via_all_reduce(
    assert global_tokens.is_contiguous()

    if local_tokens.shape[0] > 0 and (is_partial or get_attention_tp_rank() == 0):
-        assert (
-            local_tokens.untyped_storage() is not global_tokens.untyped_storage()
-        ), "aliasing between global_tokens and local_tokens not allowed"
+        if local_tokens.untyped_storage() is global_tokens.untyped_storage():
+            # dp_gather is an in-place operation and requires input and output tensors to not be aliased.
+            # so we create a separate buffer if they share the same storage.
+            global_tokens = torch.empty_like(global_tokens)

        memcpy_triton(
            global_tokens, local_tokens, 0, local_start_pos, local_num_tokens, False
@@ -347,9 +348,10 @@ def dp_scatter(
    assert local_tokens.is_contiguous()
    assert global_tokens.is_contiguous()
    if local_tokens.shape[0] > 0:
-        assert (
-            local_tokens.untyped_storage() is not global_tokens.untyped_storage()
-        ), "aliasing between local_tokens and global_tokens not allowed"
+        if local_tokens.untyped_storage() is global_tokens.untyped_storage():
+            # dp_scatter is an in-place operation and requires input and output tensors to not be aliased.
+            # so we create a separate buffer if they share the same storage.
+            local_tokens = torch.empty_like(local_tokens)

        memcpy_triton(
            local_tokens, global_tokens, 0, local_start_pos, local_num_tokens, True