Unverified Commit d98a4913 authored by Shangming Cai's avatar Shangming Cai Committed by GitHub
Browse files

[PD] Refactor parallel sizes and add pp support for mooncake (#8571)


Signed-off-by: default avatarShangming Cai <caishangming@linux.alibaba.com>
parent 08f8f490
......@@ -25,10 +25,13 @@ class KVArgs:
gpu_id: int
# for different tp
decode_tp_size: int
# for pp prefill
prefill_pp_size: int
kv_head_num: int
page_size: int
# for pp prefill
prefill_pp_size: int
pp_rank: int
# for system dp
system_dp_rank: int
class KVPoll:
......
......@@ -44,6 +44,7 @@ from sglang.srt.disaggregation.utils import (
poll_and_all_reduce,
prepare_abort,
)
from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.managers.schedule_batch import FINISH_ABORT, ScheduleBatch
from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
......@@ -184,9 +185,13 @@ class DecodePreallocQueue:
kv_args_class = get_kv_class(self.transfer_backend, KVClassType.KVARGS)
kv_args = kv_args_class()
attn_tp_size = self.tp_size // self.dp_size
attn_tp_size = get_attention_tp_size()
kv_args.engine_rank = self.tp_rank % (attn_tp_size)
kv_args.decode_tp_size = attn_tp_size
# Note(shangming): pp is not supported on the decode side yet, so its rank is fixed to 0
kv_args.pp_rank = 0
kv_args.system_dp_rank = self.scheduler.dp_rank
kv_args.prefill_pp_size = self.prefill_pp_size
kv_data_ptrs, kv_data_lens, kv_item_lens = (
self.token_to_kv_pool.get_contiguous_buf_infos()
......
......@@ -103,6 +103,8 @@ class PrefillBootstrapQueue:
kv_args_class = get_kv_class(self.transfer_backend, KVClassType.KVARGS)
kv_args = kv_args_class()
kv_args.engine_rank = self.tp_rank
kv_args.pp_rank = self.pp_rank
kv_args.system_dp_rank = self.scheduler.dp_rank
kv_args.decode_tp_size = self.decode_tp_size // self.decode_dp_size
kv_args.prefill_pp_size = self.pp_size
kv_data_ptrs, kv_data_lens, kv_item_lens = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment