"vllm/vscode:/vscode.git/clone" did not exist on "e03940762b43812fccd3c214bda60201cff9d16a"
Unverified Commit 045b61dd authored by Xuwei's avatar Xuwei Committed by GitHub
Browse files

fix: resolve token loss bug in final packet (#2985)


Signed-off-by: default avatarXuwei Li <lixuwei.xy@gmail.com>
parent 08cb08c1
......@@ -97,20 +97,18 @@ class DecodeWorkerHandler(BaseWorkerHandler):
num_output_tokens_so_far = 0
async for res in stream_source:
finish_reason = res["meta_info"]["finish_reason"]
if finish_reason:
out = {"token_ids": [], "finish_reason": finish_reason["type"]}
else:
try:
next_total_toks = len(res["output_ids"])
except KeyError:
raise ValueError(
f"Missing 'output_ids' in response. This often happens when using skip_tokenizer_init=True. "
f"Missing 'output_ids' in response. This often happens when using skip_tokenizer_init=False. "
f"If you're using ModelType.CHAT or custom model configurations, you may need to modify "
f"the tokenization/detokenization logic in your handler. Response keys: {list(res.keys())}"
)
out = {"token_ids": res["output_ids"][num_output_tokens_so_far:]}
num_output_tokens_so_far = next_total_toks
finish_reason = res["meta_info"]["finish_reason"]
if finish_reason:
out = {"token_ids": [], "finish_reason": finish_reason["type"]}
yield out
......@@ -212,9 +212,6 @@ class HandlerBase:
request_id, model_name
)
yield final_out
else:
yield {"finish_reason": "stop", "token_ids": []}
break
if not res.outputs:
yield {"finish_reason": "error", "token_ids": []}
......
......@@ -60,12 +60,6 @@ class BaseWorkerHandler(ABC):
async for res in gen:
# res is vllm's RequestOutput
# This is the expected way for a request to end.
# The new token ID will be eos, don't forward it.
if res.finished:
yield {"finish_reason": "stop", "token_ids": []}
break
if not res.outputs:
yield {"finish_reason": "error", "token_ids": []}
break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment