"vllm/executor/mp_distributed_executor.py" did not exist on "dd793d1de59b5efad25f4794b68cb935824c7a11"
Unverified Commit 067da2d1 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Core] Simplify setting new_token_ids in CachedRequestData (#26388)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent 046118b9
......@@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface):
num_computed_tokens: list[int] = []
num_output_tokens: list[int] = []
use_connector = self.connector is not None
for req in itertools.chain(running_reqs, resumed_reqs):
req_id = req.request_id
req_ids.append(req_id)
......@@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface):
req.num_computed_tokens : req.num_computed_tokens + num_tokens
]
new_token_ids.append(token_ids)
elif use_connector:
# When using a KVConnector, we add a placeholder to avoid index
# out of bounds errors. TODO: Remove this once the KVConnector
# is updated to handle token IDs properly.
new_token_ids.append([])
new_block_ids.append(
req_to_new_blocks[req_id].get_block_ids(allow_none=True)
)
num_computed_tokens.append(req.num_computed_tokens)
num_output_tokens.append(len(req.output_token_ids))
num_output_tokens.append(req.num_output_tokens)
# Because resumed_reqs is usually empty, it is more efficient to do
# in-place appending so that we don't need to allocate a new list.
resumed_from_preemption = [False] * len(running_reqs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment