Unverified Commit 933cdea4 authored by Huamin Li's avatar Huamin Li Committed by GitHub
Browse files

[BugFix] Don’t compute reorder threshold when there are no attention groups (#27861)

parent 3933f18a
......@@ -4149,6 +4149,11 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
group.get_metadata_builder().reorder_batch_threshold
for group in self._attn_group_iterator()
]
# If there are no attention groups (attention-free model) or no backend
# reports a threshold, leave reordering disabled.
if len(reorder_batch_thresholds) == 0:
self.reorder_batch_threshold = None
return
self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
def _find_compatible_block_sizes(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment