Unverified Commit eb429b88 authored by Shangming Cai's avatar Shangming Cai Committed by GitHub
Browse files

[PD] Respect sampling_params.max_new_tokens when PD disaggregation is activated (#7598)


Signed-off-by: default avatarShangming Cai <caishangming@linux.alibaba.com>
parent 49538d11
...@@ -604,9 +604,21 @@ class DecodeTransferQueue: ...@@ -604,9 +604,21 @@ class DecodeTransferQueue:
: decode_req.req.top_logprobs_num : decode_req.req.top_logprobs_num
].tolist() ].tolist()
) )
if hasattr(decode_req.kv_receiver, "clear"): if hasattr(decode_req.kv_receiver, "clear"):
decode_req.kv_receiver.clear() decode_req.kv_receiver.clear()
# special handling for sampling_params.max_new_tokens == 1
if decode_req.req.sampling_params.max_new_tokens == 1:
# finish immediately
decode_req.req.check_finished()
self.scheduler.stream_output(
[decode_req.req], decode_req.req.return_logprob
)
self.tree_cache.cache_finished_req(decode_req.req)
else:
transferred_reqs.append(decode_req.req) transferred_reqs.append(decode_req.req)
indices_to_remove.add(i) indices_to_remove.add(i)
elif poll in [ elif poll in [
KVPoll.Bootstrapping, KVPoll.Bootstrapping,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment