saving hidden_states.clone() (#7705)

cb432f17 · Cheng Wan · GitHub · 1964c325 · cb432f17 · cb432f17
Unverified Commit cb432f17 authored Jul 04, 2025 by Cheng Wan Committed by GitHub Jul 04, 2025
Showing with 2 additions and 7 deletions

python/sglang/srt/layers/logits_processor.py python/sglang/srt/layers/logits_processor.py +2 -2

python/sglang/srt/models/deepseek_v2.py python/sglang/srt/models/deepseek_v2.py +0 -5

No files found.
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -436,8 +436,8 @@ class LogitsProcessor(nn.Module):
        if self.do_tensor_parallel_all_gather_dp_attn:
            logits_metadata.compute_dp_attention_metadata(hidden_states)
            hidden_states, local_hidden_states = (
-                logits_metadata.gathered_buffer,
+                torch.empty_like(logits_metadata.gathered_buffer),
-                hidden_states.clone(),
+                hidden_states,
            )
            dp_gather_replicate(hidden_states, local_hidden_states, logits_metadata)

--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -1840,11 +1840,6 @@ class DeepseekV2DecoderLayer(nn.Module):
            hidden_states, residual, forward_batch
        )
-        if self.enable_dp_attention and self.speculative_algorithm.is_eagle():
-            # NOTE: this line resolves the degradation of MTP reception rate for non-zero DP ranks.
-            # See discussion here (https://github.com/sgl-project/sglang/pull/6081#discussion_r2147452251).
-            hidden_states = hidden_states.clone()
        return hidden_states, residual
    def op_comm_prepare_attn(