Commit 87ce2db9 authored by laibao's avatar laibao
Browse files

fix(v1):修复抢占恢复时 BlockTable 溢出

  - 缺失请求统一延后重新加入,避免同一轮重复写入
  - 抢占恢复用覆盖写入(add_row),正常运行用增量追加(append_row)
  - 保持正常请求的追加语义不变
parent cedfe391
...@@ -539,19 +539,16 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin): ...@@ -539,19 +539,16 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
# The request is not in the persistent batch. # The request is not in the persistent batch.
# The request was either preempted and resumed later, or was not # The request was either preempted and resumed later, or was not
# scheduled in the previous step and needs to be added again. # scheduled in the previous step and needs to be added again.
if not is_last_rank: req_ids_to_add.append(req_id)
req_state = self.requests[req_id] continue
self.input_batch.add_request(req_state)
req_index = self.input_batch.req_id_to_index.get(req_id)
else:
req_ids_to_add.append(req_id)
continue
# Update the persistent batch. # Update the persistent batch.
self.input_batch.num_computed_tokens_cpu[req_index] = ( self.input_batch.num_computed_tokens_cpu[req_index] = (
num_computed_tokens) num_computed_tokens)
self.input_batch.block_table.append_row(new_block_ids, req_index) if resumed_from_preemption:
self.input_batch.block_table.add_row(new_block_ids, req_index)
else:
self.input_batch.block_table.append_row(new_block_ids, req_index)
# For the last rank, we don't need to update the token_ids_cpu # For the last rank, we don't need to update the token_ids_cpu
# because the sampled tokens are already cached. # because the sampled tokens are already cached.
if not is_last_rank: if not is_last_rank:
...@@ -3699,4 +3696,4 @@ class GPUModelRunnerMTP(GPUModelRunnerBase): ...@@ -3699,4 +3696,4 @@ class GPUModelRunnerMTP(GPUModelRunnerBase):
if envs.VLLM_USE_ZERO_MTP: if envs.VLLM_USE_ZERO_MTP:
GPUModelRunner=GPUModelRunnerMTP GPUModelRunner=GPUModelRunnerMTP
else: else:
GPUModelRunner=GPUModelRunnerBase GPUModelRunner=GPUModelRunnerBase
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment