[Core] Minor structured-output related scheduler optimization (#34765)

Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>

[Core] Minor structured-output related scheduler optimization (#34765)
Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
820d7815 · Nick Hill · GitHub · ab6f3487 · 820d7815
Unverified Commit 820d7815 authored Feb 21, 2026 by Nick Hill Committed by GitHub Feb 21, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

vllm/v1/core/sched/scheduler.py vllm/v1/core/sched/scheduler.py +6 -6

No files found.
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -945,7 +945,7 @@ class Scheduler(SchedulerInterface):
                request.num_tokens + request.num_output_placeholders
            )
            scheduler_output.has_structured_output_requests |= (
-                request.use_structured_output
+                request.use_structured_output and not request.is_prefill_chunk
            )

            # NOTE: _free_encoder_inputs relies on num_computed_tokens, which
@@ -1232,14 +1232,14 @@ class Scheduler(SchedulerInterface):
    ) -> GrammarOutput | None:
        # Collect list of scheduled request ids that use structured output.
        # The corresponding rows of the bitmask will be in this order.
-        # PERF: in case of chunked prefill,
-        # request might not include any new tokens.
-        # Therefore, we might introduce some additional
-        # cycle to fill in the bitmask, which could be a big no-op.
+        if not scheduler_output.has_structured_output_requests:
+            return None
+
        structured_output_request_ids = [
            req_id
            for req_id in scheduler_output.num_scheduled_tokens
-            if (req := self.requests.get(req_id)) and req.use_structured_output
+            if (req := self.requests.get(req_id))
+            and (req.use_structured_output and not req.is_prefill_chunk)
        ]
        if not structured_output_request_ids:
            return None