Unverified Commit 5c8f2adf authored by Jie Luo's avatar Jie Luo Committed by GitHub
Browse files

[Bugfix] Fix block size in block_table with PCP (#29094)


Signed-off-by: default avatarLivinfly <luojie3m@gmail.com>
parent ed8e6843
...@@ -84,7 +84,7 @@ class BlockTable: ...@@ -84,7 +84,7 @@ class BlockTable:
self.pcp_world_size = get_pcp_group().world_size self.pcp_world_size = get_pcp_group().world_size
self.pcp_rank = get_pcp_group().rank_in_group self.pcp_rank = get_pcp_group().rank_in_group
except AssertionError: except AssertionError:
# DCP might not be initialized in testing # PCP might not be initialized in testing
self.pcp_world_size = 1 self.pcp_world_size = 1
self.pcp_rank = 0 self.pcp_rank = 0
try: try:
...@@ -268,6 +268,11 @@ class MultiGroupBlockTable: ...@@ -268,6 +268,11 @@ class MultiGroupBlockTable:
# (max_model_len//dcp_world_size) tokens in kvcache, # (max_model_len//dcp_world_size) tokens in kvcache,
# so the block_size which used for calc max_num_blocks_per_req # so the block_size which used for calc max_num_blocks_per_req
# must be multiplied by dcp_world_size. # must be multiplied by dcp_world_size.
try:
pcp_world_size = get_pcp_group().world_size
except AssertionError:
# PCP might not be initialized in testing
pcp_world_size = 1
try: try:
dcp_world_size = get_dcp_group().world_size dcp_world_size = get_dcp_group().world_size
except AssertionError: except AssertionError:
...@@ -280,12 +285,14 @@ class MultiGroupBlockTable: ...@@ -280,12 +285,14 @@ class MultiGroupBlockTable:
f"must match block_sizes length ({len(block_sizes)})" f"must match block_sizes length ({len(block_sizes)})"
) )
total_cp_world_size = dcp_world_size * pcp_world_size
self.block_tables = [ self.block_tables = [
BlockTable( BlockTable(
block_size, block_size,
max_num_reqs, max_num_reqs,
max( max(
cdiv(max_model_len, block_size * dcp_world_size), cdiv(max_model_len, block_size * total_cp_world_size),
1 + num_speculative_tokens, 1 + num_speculative_tokens,
), ),
max_num_batched_tokens, max_num_batched_tokens,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment