Unverified Commit 372bf089 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix missing seq_start_loc in xformers prefill metadata (#12464)


Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
parent 5204ff5c
...@@ -199,6 +199,8 @@ class XFormersMetadata(AttentionMetadata, PagedAttentionMetadata): ...@@ -199,6 +199,8 @@ class XFormersMetadata(AttentionMetadata, PagedAttentionMetadata):
# Compute some attn_metadata fields which default to None # Compute some attn_metadata fields which default to None
query_start_loc = (None if self.query_start_loc is None else query_start_loc = (None if self.query_start_loc is None else
self.query_start_loc[:self.num_prefills + 1]) self.query_start_loc[:self.num_prefills + 1])
seq_start_loc = (None if self.seq_start_loc is None else
self.seq_start_loc[:self.num_prefills + 1])
slot_mapping = (None if self.slot_mapping is None else slot_mapping = (None if self.slot_mapping is None else
self.slot_mapping[:self.num_prefill_tokens]) self.slot_mapping[:self.num_prefill_tokens])
seq_lens = (None if self.seq_lens is None else seq_lens = (None if self.seq_lens is None else
...@@ -225,6 +227,7 @@ class XFormersMetadata(AttentionMetadata, PagedAttentionMetadata): ...@@ -225,6 +227,7 @@ class XFormersMetadata(AttentionMetadata, PagedAttentionMetadata):
max_prefill_seq_len=self.max_prefill_seq_len, max_prefill_seq_len=self.max_prefill_seq_len,
max_decode_seq_len=0, max_decode_seq_len=0,
query_start_loc=query_start_loc, query_start_loc=query_start_loc,
seq_start_loc=seq_start_loc,
context_lens_tensor=context_lens_tensor, context_lens_tensor=context_lens_tensor,
block_tables=block_tables, block_tables=block_tables,
use_cuda_graph=False, use_cuda_graph=False,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment