"tests/pipelines/vscode:/vscode.git/clone" did not exist on "8e8954bd15d3a3c36fbbaa37c978a8b5b8379f9f"
Unverified Commit 41a11335 authored by Shangming Cai's avatar Shangming Cai Committed by GitHub
Browse files

Fix potential eos bug on decode instance when PD is enabled (#12206)


Signed-off-by: default avatarShangming Cai <csmthu@gmail.com>
parent a1f2dc90
...@@ -773,8 +773,12 @@ class DecodeTransferQueue: ...@@ -773,8 +773,12 @@ class DecodeTransferQueue:
indices_to_remove.add(i) indices_to_remove.add(i)
decode_req.req.time_stats.wait_queue_entry_time = time.perf_counter() decode_req.req.time_stats.wait_queue_entry_time = time.perf_counter()
# special handling for sampling_params.max_new_tokens == 1 # special handling for corner cases
if decode_req.req.sampling_params.max_new_tokens == 1: should_finish = (
decode_req.req.sampling_params.max_new_tokens == 1
or output_id in decode_req.req.eos_token_ids
)
if should_finish:
# finish immediately # finish immediately
decode_req.req.time_stats.forward_entry_time = ( decode_req.req.time_stats.forward_entry_time = (
decode_req.req.time_stats.completion_time decode_req.req.time_stats.completion_time
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment