Merge branch 'v0.9.2-dev-tc_opt' into 'v0.9.2-dev'

fix(v1)：修复抢占恢复时 BlockTable 溢出 See merge request dcutoolkit/deeplearing/vllm!398

Merge branch 'v0.9.2-dev-tc_opt' into 'v0.9.2-dev'
fix(v1)：修复抢占恢复时 BlockTable 溢出 See merge request dcutoolkit/deeplearing/vllm!398
56d6c689 · zhuwenwen · f35ea024 · 87ce2db9 · 56d6c689
Commit 56d6c689 authored Jan 29, 2026 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 10 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +7 -10

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -539,19 +539,16 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
                # The request is not in the persistent batch.
                # The request was either preempted and resumed later, or was not
                # scheduled in the previous step and needs to be added again.
-                if not is_last_rank:
-                    req_state = self.requests[req_id]
-                    self.input_batch.add_request(req_state)
-                    req_index = self.input_batch.req_id_to_index.get(req_id)
-                else:
                req_ids_to_add.append(req_id)
                continue

            # Update the persistent batch.
            self.input_batch.num_computed_tokens_cpu[req_index] = (
                num_computed_tokens)
+            if resumed_from_preemption:
+                self.input_batch.block_table.add_row(new_block_ids, req_index)
+            else:
                self.input_batch.block_table.append_row(new_block_ids, req_index)
-
            # For the last rank, we don't need to update the token_ids_cpu
            # because the sampled tokens are already cached.
            if not is_last_rank: