fix: 修复丢弃MTP代码报错

0cf05716 · jujl1 · c1795786 · 0cf05716
Commit 0cf05716 authored Jan 09, 2026 by jujl1
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +3 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1643,9 +1643,6 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
        # Mask out the sampled tokens that should not be sampled.
        for i in discard_sampled_tokens_req_indices:
            valid_sampled_token_ids[i].clear()
-        if spec_token_ids is not None:
-            for i in discard_sampled_tokens_req_indices:
-                spec_token_ids[i].clear()
        # Cache the sampled tokens in the model runner, so that the scheduler
        # doesn't need to send them back.
@@ -1685,7 +1682,9 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
                spec_decode_metadata,
                attn_metadata,
            )
+        if spec_token_ids is not None:
+            for i in discard_sampled_tokens_req_indices:
+                spec_token_ids[i].clear()
        # Clear KVConnector state after all KVs are generated.
        if has_kv_transfer_group():
            get_kv_transfer_group().clear_connector_metadata()