Unverified Commit fb4983e1 authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[Misc] add reorder_batch AttentionMetadataBuilder (#23798)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 379ea282
...@@ -212,6 +212,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]): ...@@ -212,6 +212,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
""" """
raise NotImplementedError raise NotImplementedError
def reorder_batch(self, input_batch: "InputBatch",
scheduler_output: "SchedulerOutput") -> bool:
"""
Update the order of requests in the batch based on the attention
backend's needs. For example, some attention backends (namely MLA) may
want to separate requests based on if the attention computation will be
compute-bound or memory-bound.
Args:
input_batch: input batch
scheduler_output: scheduler output.
Returns:
True if the batch was modified, False otherwise.
"""
raise NotImplementedError
def build_for_cudagraph_capture( def build_for_cudagraph_capture(
self, common_attn_metadata: CommonAttentionMetadata) -> M: self, common_attn_metadata: CommonAttentionMetadata) -> M:
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment