"vscode:/vscode.git/clone" did not exist on "05c50a82b82c108ad963fec4e572e1a888e62962"
Unverified Commit 912788c0 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

perf: optimize local_block_table memory allocation (#6273)

parent 0f75b907
......@@ -1165,7 +1165,6 @@ class FlashAttentionBackend(AttentionBackend):
max_virtual_batches = max_bs * (
(max_seq_len + attn_chunk_size - 1) // attn_chunk_size
)
max_blocks_per_seq = (max_seq_len + attn_chunk_size - 1) // attn_chunk_size
max_pages_per_block = (attn_chunk_size + page_size - 1) // page_size
self.decode_cuda_graph_local_attn_metadata = {
......@@ -1177,7 +1176,7 @@ class FlashAttentionBackend(AttentionBackend):
),
"local_block_table": torch.zeros(
max_virtual_batches,
max_blocks_per_seq * max_pages_per_block,
max_pages_per_block,
dtype=torch.int32,
device=self.device,
),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment