fix: 修复丢弃MTP代码报错

794553fd · jujl1 · 4b752578 · 794553fd
Commit 794553fd authored Jan 09, 2026 by jujl1
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +3 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1638,9 +1638,6 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
        # Mask out the sampled tokens that should not be sampled.
        for i in discard_sampled_tokens_req_indices:
            valid_sampled_token_ids[i].clear()
-        if spec_token_ids is not None:
-            for i in discard_sampled_tokens_req_indices:
-                spec_token_ids[i].clear()

        # Cache the sampled tokens in the model runner, so that the scheduler
        # doesn't need to send them back.
@@ -1680,7 +1677,9 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
                spec_decode_metadata,
                attn_metadata,
            )
-
+        if spec_token_ids is not None:
+            for i in discard_sampled_tokens_req_indices:
+                spec_token_ids[i].clear()
        # Clear KVConnector state after all KVs are generated.
        if has_kv_transfer_group():
            get_kv_transfer_group().clear_connector_metadata()