Commit 3cd98472 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'optimize-block-tables' into 'v0.9.2-dev'

[fix]优化 block_tables 的生成逻辑,增加对混合情况的检测,确保在存在空和非空块时正确计算最大块长度。

See merge request dcutoolkit/deeplearing/vllm!191
parents 30ed0f33 4932783d
...@@ -246,12 +246,33 @@ class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]): ...@@ -246,12 +246,33 @@ class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]):
device, non_blocking=True) device, non_blocking=True)
else: else:
block_tables = make_tensor_with_pad( has_empty: bool = any(len(bt) == 0 for bt in self.block_tables)
self.block_tables, has_non_empty = any(len(bt) > 0 for bt in self.block_tables)
pad=0, max_block_length = 0
dtype=torch.int, if has_empty and has_non_empty:
device=device, for inter_data in self.input_builder.inter_data_list:
) block_tables = inter_data.block_tables
if block_tables:
for seq_id in inter_data.seq_ids:
if seq_id in block_tables:
block_table = block_tables[seq_id]
max_block_length = max(max_block_length, len(block_table))
if max_block_length >0:
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int,
device=device,
max_len=max_block_length,
)
else:
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int,
device=device,
)
assert max_query_len > 0, "query_lens: {}".format(query_lens) assert max_query_len > 0, "query_lens: {}".format(query_lens)
assert device is not None assert device is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment