[EAGLE] Fix some boundary situation when retract reqs and req's max token = 1 (#2939)

Co-authored-by: josephyou <josephyou@tencent.com>

[EAGLE] Fix some boundary situation when retract reqs and req's max token = 1 (#2939)
Co-authored-by: josephyou <josephyou@tencent.com>
b730aa6b · 996_icu · GitHub · 60b2a44a · b730aa6b · b730aa6b
Unverified Commit b730aa6b authored Jan 21, 2025 by 996_icu Committed by GitHub Jan 20, 2025
Showing with 10 additions and 0 deletions

python/sglang/srt/managers/schedule_batch.py python/sglang/srt/managers/schedule_batch.py +2 -0

python/sglang/srt/speculative/eagle_utils.py python/sglang/srt/speculative/eagle_utils.py +8 -0

No files found.
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -1112,6 +1112,8 @@ class ScheduleBatch:
        self.has_grammar = any(req.grammar for req in self.reqs)
        self.sampling_info.filter_batch(keep_indices, new_indices)
+        if self.spec_info:
+            self.spec_info.filter_batch(new_indices)
    def merge_batch(self, other: "ScheduleBatch"):
        # Penalizer orchestrator must be merged before Batch.reqs is merged. This is because

--- a/python/sglang/srt/speculative/eagle_utils.py
+++ b/python/sglang/srt/speculative/eagle_utils.py
@@ -228,6 +228,14 @@ class EAGLEDraftInput(SpecInfo):
        assert len(batch.extend_lens) == 1
        batch.input_ids = torch.concat((batch.input_ids[1:], self.verified_id))
+    def filter_batch(
+        self,
+        new_indices: torch.Tensor,
+    ):
+        self.sample_output = self.sample_output[: len(new_indices)]
+        self.hidden_states = self.hidden_states[: len(new_indices)]
+        self.verified_id = self.verified_id[: len(new_indices)]
    def prepare_for_decode(self, batch: ScheduleBatch):
        prob = self.sample_output  # shape: (b * top_k, vocab) or (b, vocab)
        top = torch.topk(prob, self.topk, dim=-1)