"server/vscode:/vscode.git/clone" did not exist on "379c5c4da2494a15fb82b3b1a39fa454cb73df44"
Unverified Commit 912788c0 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

perf: optimize local_block_table memory allocation (#6273)

parent 0f75b907
......@@ -1165,7 +1165,6 @@ class FlashAttentionBackend(AttentionBackend):
max_virtual_batches = max_bs * (
(max_seq_len + attn_chunk_size - 1) // attn_chunk_size
)
max_blocks_per_seq = (max_seq_len + attn_chunk_size - 1) // attn_chunk_size
max_pages_per_block = (attn_chunk_size + page_size - 1) // page_size
self.decode_cuda_graph_local_attn_metadata = {
......@@ -1177,7 +1176,7 @@ class FlashAttentionBackend(AttentionBackend):
),
"local_block_table": torch.zeros(
max_virtual_batches,
max_blocks_per_seq * max_pages_per_block,
max_pages_per_block,
dtype=torch.int32,
device=self.device,
),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment