Unverified Commit 26908d95 authored by uylnap's avatar uylnap Committed by GitHub
Browse files

* fix(detokenizer_manager.py): fix truncated decoded output (#586)


Co-authored-by: default avatarhnyls2002 <hnyls2002@gmail.com>
parent c0982ac5
...@@ -8,7 +8,7 @@ from sglang.global_config import global_config ...@@ -8,7 +8,7 @@ from sglang.global_config import global_config
from sglang.lang.chat_template import get_chat_template_by_model_path from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams from sglang.lang.ir import SglSamplingParams
from sglang.utils import find_printable_text, http_request from sglang.utils import http_request
class RuntimeEndpoint(BaseBackend): class RuntimeEndpoint(BaseBackend):
...@@ -187,11 +187,11 @@ class RuntimeEndpoint(BaseBackend): ...@@ -187,11 +187,11 @@ class RuntimeEndpoint(BaseBackend):
if chunk == "data: [DONE]": if chunk == "data: [DONE]":
break break
data = json.loads(chunk[5:].strip("\n")) data = json.loads(chunk[5:].strip("\n"))
text = find_printable_text(data["text"][pos:]) chunk_text = data["text"][pos:]
incomplete_text = data["incomplete_text"]
meta_info = data["meta_info"] meta_info = data["meta_info"]
pos += len(text) pos += len(chunk_text)
incomplete_text = data["text"][pos:] yield chunk_text, meta_info
yield text, meta_info
if len(incomplete_text) > 0: if len(incomplete_text) > 0:
yield incomplete_text, meta_info yield incomplete_text, meta_info
......
...@@ -11,7 +11,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer ...@@ -11,7 +11,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.controller.infer_batch import FINISH_MATCHED_STR from sglang.srt.managers.controller.infer_batch import FINISH_MATCHED_STR
from sglang.srt.managers.io_struct import BatchStrOut, BatchTokenIDOut from sglang.srt.managers.io_struct import BatchStrOut, BatchTokenIDOut
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.utils import get_exception_traceback, graceful_registry from sglang.utils import find_printable_text, get_exception_traceback, graceful_registry
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
...@@ -55,9 +55,13 @@ class DetokenizerManager: ...@@ -55,9 +55,13 @@ class DetokenizerManager:
# Trim stop str # Trim stop str
# TODO(lmzheng): handle the case where multiple stop strs are hit # TODO(lmzheng): handle the case where multiple stop strs are hit
output_strs = [] output_strs = []
incomplete_strs = []
for i in range(len(recv_obj.rids)): for i in range(len(recv_obj.rids)):
new_text = read_texts[i][len(surr_texts[i]) :] new_text = read_texts[i][len(surr_texts[i]) :]
output_strs.append(recv_obj.decoded_texts[i] + new_text) complete_new_text = find_printable_text(new_text)
incomplete_new_text = new_text[len(complete_new_text) :]
output_strs.append(recv_obj.decoded_texts[i] + complete_new_text)
incomplete_strs.append(incomplete_new_text)
if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR): if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR):
pos = output_strs[i].find(recv_obj.finished_reason[i].matched) pos = output_strs[i].find(recv_obj.finished_reason[i].matched)
...@@ -67,7 +71,8 @@ class DetokenizerManager: ...@@ -67,7 +71,8 @@ class DetokenizerManager:
self.send_to_tokenizer.send_pyobj( self.send_to_tokenizer.send_pyobj(
BatchStrOut( BatchStrOut(
rids=recv_obj.rids, rids=recv_obj.rids,
output_str=output_strs, output_strs=output_strs,
incomplete_strs=incomplete_strs,
meta_info=recv_obj.meta_info, meta_info=recv_obj.meta_info,
finished_reason=recv_obj.finished_reason, finished_reason=recv_obj.finished_reason,
) )
......
...@@ -122,7 +122,8 @@ class BatchTokenIDOut: ...@@ -122,7 +122,8 @@ class BatchTokenIDOut:
@dataclass @dataclass
class BatchStrOut: class BatchStrOut:
rids: List[str] rids: List[str]
output_str: List[str] output_strs: List[str]
incomplete_strs: List[str]
meta_info: List[Dict] meta_info: List[Dict]
finished_reason: List[BaseFinishReason] finished_reason: List[BaseFinishReason]
......
...@@ -316,7 +316,8 @@ class TokenizerManager: ...@@ -316,7 +316,8 @@ class TokenizerManager:
recv_obj.meta_info[i]["id"] = rid recv_obj.meta_info[i]["id"] = rid
out_dict = { out_dict = {
"text": recv_obj.output_str[i], "text": recv_obj.output_strs[i],
"incomplete_text": recv_obj.incomplete_strs[i],
"meta_info": recv_obj.meta_info[i], "meta_info": recv_obj.meta_info[i],
} }
state.out_list.append(out_dict) state.out_list.append(out_dict)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment