[BugFix] Don’t compute reorder threshold when there are no attention groups (#27861)

933cdea4 · Huamin Li · GitHub · 3933f18a · 933cdea4
Unverified Commit 933cdea4 authored Oct 31, 2025 by Huamin Li Committed by GitHub Oct 31, 2025
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 0 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +5 -0

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4149,6 +4149,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
            group.get_metadata_builder().reorder_batch_threshold
            for group in self._attn_group_iterator()
        ]
+        # If there are no attention groups (attention-free model) or no backend
+        # reports a threshold, leave reordering disabled.
+        if len(reorder_batch_thresholds) == 0:
+            self.reorder_batch_threshold = None
+            return
        self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)

    def _find_compatible_block_sizes(