Unverified Commit e21aa1df authored by Hongbo Xu's avatar Hongbo Xu Committed by GitHub
Browse files

[PD] Add different TP sizes support for no-MLA models (#6793)


Co-authored-by: default avatarshangmingc <csmthu@gmail.com>
Co-authored-by: default avatarShangming Cai <caishangming@linux.alibaba.com>
parent f3cbd245
...@@ -27,6 +27,8 @@ class KVArgs: ...@@ -27,6 +27,8 @@ class KVArgs:
decode_tp_size: int decode_tp_size: int
# for pp prefill # for pp prefill
prefill_pp_size: int prefill_pp_size: int
kv_head_num: int
page_size: int
class KVPoll: class KVPoll:
......
...@@ -122,6 +122,9 @@ class PrefillBootstrapQueue: ...@@ -122,6 +122,9 @@ class PrefillBootstrapQueue:
kv_args.kv_data_ptrs = kv_data_ptrs kv_args.kv_data_ptrs = kv_data_ptrs
kv_args.kv_data_lens = kv_data_lens kv_args.kv_data_lens = kv_data_lens
kv_args.kv_item_lens = kv_item_lens kv_args.kv_item_lens = kv_item_lens
if not self.is_mla_backend:
kv_args.kv_head_num = self.token_to_kv_pool.head_num
kv_args.page_size = self.token_to_kv_pool.page_size
kv_args.aux_data_ptrs, kv_args.aux_data_lens, kv_args.aux_item_lens = ( kv_args.aux_data_ptrs, kv_args.aux_data_lens, kv_args.aux_item_lens = (
self.metadata_buffers.get_buf_infos() self.metadata_buffers.get_buf_infos()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment