Unverified Commit 820d7815 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Core] Minor structured-output related scheduler optimization (#34765)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent ab6f3487
......@@ -945,7 +945,7 @@ class Scheduler(SchedulerInterface):
request.num_tokens + request.num_output_placeholders
)
scheduler_output.has_structured_output_requests |= (
request.use_structured_output
request.use_structured_output and not request.is_prefill_chunk
)
# NOTE: _free_encoder_inputs relies on num_computed_tokens, which
......@@ -1232,14 +1232,14 @@ class Scheduler(SchedulerInterface):
) -> GrammarOutput | None:
# Collect list of scheduled request ids that use structured output.
# The corresponding rows of the bitmask will be in this order.
# PERF: in case of chunked prefill,
# request might not include any new tokens.
# Therefore, we might introduce some additional
# cycle to fill in the bitmask, which could be a big no-op.
if not scheduler_output.has_structured_output_requests:
return None
structured_output_request_ids = [
req_id
for req_id in scheduler_output.num_scheduled_tokens
if (req := self.requests.get(req_id)) and req.use_structured_output
if (req := self.requests.get(req_id))
and (req.use_structured_output and not req.is_prefill_chunk)
]
if not structured_output_request_ids:
return None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment