Commit 3344fbec authored by lizhigong's avatar lizhigong
Browse files

add recive mtp tokenids and req_ids in pp rank 0 from pp rank 1

parent a183111e
......@@ -401,6 +401,13 @@ class V1ZeroModelRunner(GPUModelRunner):
return self.kv_connector_no_forward(scheduler_output)
if get_pp_group().is_first_rank and self.last_draft_token_ids != None:
recv_draft_dict = get_pp_group().recv_tensor_dict(
all_gather_group=get_tp_group())
self.last_draft_token_ids = recv_draft_dict['draft_token_ids']
self.last_sampled_req_ids = recv_draft_dict['sampled_req_ids ']
# Prepare the decoder inputs.
(attn_metadata, attention_cuda_graphs, logits_indices,
spec_decode_metadata,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment