Unverified Commit 31c5d0a1 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Optimize] Don't send token ids when kv connector is not used (#20586)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent afb7cff1
...@@ -621,6 +621,7 @@ class Scheduler(SchedulerInterface): ...@@ -621,6 +621,7 @@ class Scheduler(SchedulerInterface):
new_block_ids: list[tuple[list[int], ...]] = [] new_block_ids: list[tuple[list[int], ...]] = []
num_computed_tokens: list[int] = [] num_computed_tokens: list[int] = []
use_connector = self.connector is not None
for req in itertools.chain(running_reqs, resumed_reqs): for req in itertools.chain(running_reqs, resumed_reqs):
req_id = req.request_id req_id = req.request_id
req_ids.append(req_id) req_ids.append(req_id)
...@@ -635,7 +636,10 @@ class Scheduler(SchedulerInterface): ...@@ -635,7 +636,10 @@ class Scheduler(SchedulerInterface):
token_ids = req.all_token_ids[req.num_computed_tokens:req. token_ids = req.all_token_ids[req.num_computed_tokens:req.
num_computed_tokens + num_tokens] num_computed_tokens + num_tokens]
new_token_ids.append(token_ids) new_token_ids.append(token_ids)
else: elif use_connector:
# When using a KVConnector, we add a placeholder to avoid index
# out of bounds errors. TODO: Remove this once the KVConnector
# is updated to handle token IDs properly.
new_token_ids.append([]) new_token_ids.append([])
new_block_ids.append(req_to_new_block_ids[req_id]) new_block_ids.append(req_to_new_block_ids[req_id])
num_computed_tokens.append(req.num_computed_tokens) num_computed_tokens.append(req.num_computed_tokens)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment