"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "e202dd2736bc575b11250b15311512d19d3225d5"
Unverified Commit e53dfd3e authored by Lily Liu's avatar Lily Liu Committed by GitHub
Browse files

[Kernel] Fix Flashinfer Correctness (#7284)

parent 6d944202
...@@ -127,6 +127,7 @@ class FlashInferMetadata(AttentionMetadata): ...@@ -127,6 +127,7 @@ class FlashInferMetadata(AttentionMetadata):
raise ValueError( raise ValueError(
f"Only {supported_head_sizes} are supported for head_dim,", f"Only {supported_head_sizes} are supported for head_dim,",
f"received {self.head_dim}.") f"received {self.head_dim}.")
self.is_profile_run = is_block_tables_empty(self.block_tables)
def begin_forward(self): def begin_forward(self):
if self.num_prefill_tokens > 0: if self.num_prefill_tokens > 0:
...@@ -140,11 +141,14 @@ class FlashInferMetadata(AttentionMetadata): ...@@ -140,11 +141,14 @@ class FlashInferMetadata(AttentionMetadata):
assert self.paged_kv_last_page_len is not None assert self.paged_kv_last_page_len is not None
batch_size = self.query_start_loc.shape[0] - 1 batch_size = self.query_start_loc.shape[0] - 1
assert batch_size >= 0 assert batch_size >= 0
# The prefill stage does not read kv cache. # The profile run does not read kv cache.
# Both paged_kv_indices and paged_kv_last_page_len are empty. # Both paged_kv_indices and paged_kv_last_page_len are empty.
# paged_kv_indptr is a zero tensor with size batch_size + 1. # paged_kv_indptr is a zero tensor with size batch_size + 1.
self.paged_kv_indptr = torch.zeros(batch_size + 1, if self.is_profile_run:
device=self.device) self.paged_kv_indptr = torch.zeros(batch_size + 1,
device=self.device)
else:
self.paged_kv_indptr = self.paged_kv_indptr.to(self.device)
self.paged_kv_last_page_len = self.paged_kv_last_page_len.to( self.paged_kv_last_page_len = self.paged_kv_last_page_len.to(
self.device) self.device)
self.paged_kv_indices = self.paged_kv_indices.to(self.device) self.paged_kv_indices = self.paged_kv_indices.to(self.device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment