[feat]支持mtp模型full_cuda_graph

bd58c289 · 王敏 · 89eecc55 · bd58c289
Commit bd58c289 authored Aug 07, 2025 by 王敏
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/v1/spec_decode/eagle.py vllm/v1/spec_decode/eagle.py +2 -2

No files found.
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -232,7 +232,7 @@ class EagleProposer:
        sample_hidden_states = last_hidden_states[last_token_indices]
        logits = self.model.compute_logits(sample_hidden_states, None)

-        draft_token_ids = torch.argmax(logits, dim=-1)
+        draft_token_ids = logits.argmax(dim=-1)

        # Early exit if there is only one draft token to be generated.
        if self.num_speculative_tokens == 1:
@@ -380,7 +380,7 @@ class EagleProposer:
            logits = self.model.compute_logits(last_hidden_states[:batch_size],
                                               None)

-            # # TODO(wenlong): get more than one token for tree attention
+            # TODO(wenlong): get more than one token for tree attention
            draft_token_ids = logits.argmax(dim=-1)
            draft_token_ids_list.append(draft_token_ids)