Unverified Commit 624a1e47 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[V1][Minor] Minor optimizations for update_from_output (#12454)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent 372bf089
...@@ -411,6 +411,10 @@ class Scheduler: ...@@ -411,6 +411,10 @@ class Scheduler:
num_scheduled_tokens = scheduler_output.num_scheduled_tokens num_scheduled_tokens = scheduler_output.num_scheduled_tokens
new_running: List[Request] = [] new_running: List[Request] = []
outputs: List[EngineCoreOutput] = [] outputs: List[EngineCoreOutput] = []
# NOTE(woosuk): As len(self.running) can be up to 1K or more, the below
# loop can be a performance bottleneck. We should do our best to avoid
# expensive operations inside the loop.
for request in self.running: for request in self.running:
req_id = request.request_id req_id = request.request_id
request.num_computed_tokens += num_scheduled_tokens[req_id] request.num_computed_tokens += num_scheduled_tokens[req_id]
...@@ -421,6 +425,8 @@ class Scheduler: ...@@ -421,6 +425,8 @@ class Scheduler:
cached_encoder_input_ids = ( cached_encoder_input_ids = (
self.encoder_cache_manager.get_cached_input_ids(request)) self.encoder_cache_manager.get_cached_input_ids(request))
# OPTIMIZATION: Avoid list(set) if the set is empty.
if cached_encoder_input_ids:
for input_id in list(cached_encoder_input_ids): for input_id in list(cached_encoder_input_ids):
start_pos = request.mm_positions[input_id]["offset"] start_pos = request.mm_positions[input_id]["offset"]
num_tokens = request.mm_positions[input_id]["length"] num_tokens = request.mm_positions[input_id]["length"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment