Unverified Commit 71fc7b7f authored by Rain H's avatar Rain H Committed by GitHub
Browse files

[Fix] KV-cache eviction mismatch across PP ranks in DeepSeek V3/R1 (#10214)

parent 9ab72f98
......@@ -1260,6 +1260,16 @@ class ModelRunner:
// self.server_args.page_size
* self.server_args.page_size
)
# different pp rank may have different num of layers, so we need to reduce the max_total_num_tokens
if self.pp_size > 1:
tensor = torch.tensor(self.max_total_num_tokens, dtype=torch.int64)
torch.distributed.all_reduce(
tensor,
op=torch.distributed.ReduceOp.MIN,
group=get_world_group().cpu_group,
)
self.max_total_num_tokens = tensor.item()
# create token size for hybrid cache
if self.is_hybrid:
self.set_num_token_hybrid()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment