[Bug fixed] fixed the crash when enable the dp-attention on the single card (#3958)

4455b26e · DavidChan · GitHub · c553e160 · 4455b26e
Unverified Commit 4455b26e authored Mar 10, 2025 by DavidChan Committed by GitHub Mar 10, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

python/sglang/srt/models/deepseek_v2.py python/sglang/srt/models/deepseek_v2.py +3 -3

No files found.
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -848,12 +848,12 @@ class DeepseekV2AttentionMLA(nn.Module):
 def all_gather(
    input_tensor: torch.Tensor, forward_batch: ForwardBatch, rank, world_size, group
 ):
-    if world_size == 1:
-        return input_tensor
    all_lens = forward_batch.global_num_tokens_cpu
    max_len = max(forward_batch.global_num_tokens_cpu)
+    if world_size == 1:
+        return input_tensor, 0, all_lens[0]
    padded_tensor = torch.nn.functional.pad(
        input_tensor, (0, 0, 0, max_len - input_tensor.shape[0])
    )