Unverified Commit 0877f1e7 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Fix streaming (#600)

parent 5304b4ef
...@@ -180,7 +180,6 @@ class RuntimeEndpoint(BaseBackend): ...@@ -180,7 +180,6 @@ class RuntimeEndpoint(BaseBackend):
self._assert_success(res) self._assert_success(res)
pos = 0 pos = 0
incomplete_text = ""
for chunk in res.iter_lines(decode_unicode=False): for chunk in res.iter_lines(decode_unicode=False):
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
if chunk and chunk.startswith("data:"): if chunk and chunk.startswith("data:"):
...@@ -188,14 +187,10 @@ class RuntimeEndpoint(BaseBackend): ...@@ -188,14 +187,10 @@ class RuntimeEndpoint(BaseBackend):
break break
data = json.loads(chunk[5:].strip("\n")) data = json.loads(chunk[5:].strip("\n"))
chunk_text = data["text"][pos:] chunk_text = data["text"][pos:]
incomplete_text = data["incomplete_text"]
meta_info = data["meta_info"] meta_info = data["meta_info"]
pos += len(chunk_text) pos += len(chunk_text)
yield chunk_text, meta_info yield chunk_text, meta_info
if len(incomplete_text) > 0:
yield incomplete_text, meta_info
def select( def select(
self, self,
s: StreamExecutor, s: StreamExecutor,
......
...@@ -55,13 +55,11 @@ class DetokenizerManager: ...@@ -55,13 +55,11 @@ class DetokenizerManager:
# Trim stop str # Trim stop str
# TODO(lmzheng): handle the case where multiple stop strs are hit # TODO(lmzheng): handle the case where multiple stop strs are hit
output_strs = [] output_strs = []
incomplete_strs = []
for i in range(len(recv_obj.rids)): for i in range(len(recv_obj.rids)):
new_text = read_texts[i][len(surr_texts[i]) :] new_text = read_texts[i][len(surr_texts[i]) :]
complete_new_text = find_printable_text(new_text) if recv_obj.finished_reason[i] is None:
incomplete_new_text = new_text[len(complete_new_text) :] new_text = find_printable_text(new_text)
output_strs.append(recv_obj.decoded_texts[i] + complete_new_text) output_strs.append(recv_obj.decoded_texts[i] + new_text)
incomplete_strs.append(incomplete_new_text)
if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR): if isinstance(recv_obj.finished_reason[i], FINISH_MATCHED_STR):
pos = output_strs[i].find(recv_obj.finished_reason[i].matched) pos = output_strs[i].find(recv_obj.finished_reason[i].matched)
...@@ -72,7 +70,6 @@ class DetokenizerManager: ...@@ -72,7 +70,6 @@ class DetokenizerManager:
BatchStrOut( BatchStrOut(
rids=recv_obj.rids, rids=recv_obj.rids,
output_strs=output_strs, output_strs=output_strs,
incomplete_strs=incomplete_strs,
meta_info=recv_obj.meta_info, meta_info=recv_obj.meta_info,
finished_reason=recv_obj.finished_reason, finished_reason=recv_obj.finished_reason,
) )
......
...@@ -123,7 +123,6 @@ class BatchTokenIDOut: ...@@ -123,7 +123,6 @@ class BatchTokenIDOut:
class BatchStrOut: class BatchStrOut:
rids: List[str] rids: List[str]
output_strs: List[str] output_strs: List[str]
incomplete_strs: List[str]
meta_info: List[Dict] meta_info: List[Dict]
finished_reason: List[BaseFinishReason] finished_reason: List[BaseFinishReason]
......
...@@ -317,7 +317,6 @@ class TokenizerManager: ...@@ -317,7 +317,6 @@ class TokenizerManager:
recv_obj.meta_info[i]["id"] = rid recv_obj.meta_info[i]["id"] = rid
out_dict = { out_dict = {
"text": recv_obj.output_strs[i], "text": recv_obj.output_strs[i],
"incomplete_text": recv_obj.incomplete_strs[i],
"meta_info": recv_obj.meta_info[i], "meta_info": recv_obj.meta_info[i],
} }
state.out_list.append(out_dict) state.out_list.append(out_dict)
......
...@@ -164,7 +164,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request): ...@@ -164,7 +164,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
logprobs = None logprobs = None
delta = text[len(stream_buffer) :] delta = text[len(stream_buffer) :]
stream_buffer = content["text"] stream_buffer = stream_buffer + delta
choice_data = CompletionResponseStreamChoice( choice_data = CompletionResponseStreamChoice(
index=0, index=0,
text=delta, text=delta,
...@@ -323,7 +323,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ...@@ -323,7 +323,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
text = content["text"] text = content["text"]
delta = text[len(stream_buffer) :] delta = text[len(stream_buffer) :]
stream_buffer = text stream_buffer = stream_buffer + delta
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=0, index=0,
delta=DeltaMessage(content=delta), delta=DeltaMessage(content=delta),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment