Commit 3cd98472 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'optimize-block-tables' into 'v0.9.2-dev'

[fix]优化 block_tables 的生成逻辑,增加对混合情况的检测,确保在存在空和非空块时正确计算最大块长度。

See merge request dcutoolkit/deeplearing/vllm!191
parents 30ed0f33 4932783d
......@@ -246,12 +246,33 @@ class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]):
device, non_blocking=True)
else:
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int,
device=device,
)
has_empty: bool = any(len(bt) == 0 for bt in self.block_tables)
has_non_empty = any(len(bt) > 0 for bt in self.block_tables)
max_block_length = 0
if has_empty and has_non_empty:
for inter_data in self.input_builder.inter_data_list:
block_tables = inter_data.block_tables
if block_tables:
for seq_id in inter_data.seq_ids:
if seq_id in block_tables:
block_table = block_tables[seq_id]
max_block_length = max(max_block_length, len(block_table))
if max_block_length >0:
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int,
device=device,
max_len=max_block_length,
)
else:
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int,
device=device,
)
assert max_query_len > 0, "query_lens: {}".format(query_lens)
assert device is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment