avoid split chinese characters during decoding (#566)

eb3b4dc9 · AllentDan · GitHub · 9c3634ec · eb3b4dc9 · eb3b4dc9
Unverified Commit eb3b4dc9 authored Oct 18, 2023 by AllentDan Committed by GitHub Oct 18, 2023
Showing with 21 additions and 1 deletion

lmdeploy/serve/async_engine.py lmdeploy/serve/async_engine.py +10 -0

lmdeploy/serve/turbomind/chatbot.py lmdeploy/serve/turbomind/chatbot.py +6 -1

lmdeploy/turbomind/chat.py lmdeploy/turbomind/chat.py +5 -0

No files found.
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -156,6 +156,11 @@ class AsyncEngine:
                    # decode res
                    response = self.tokenizer.decode(res.tolist(),
                                                     offset=response_size)
+                    # utf-8 char at the end means it's a potential unfinished
+                    # byte sequence, continue to concate it with the next
+                    # sequence and decode them together
+                    if response.endswith('�'):
+                        continue
                    # response, history token len,
                    # input token len, gen token len
                    yield GenOut(response, self.steps[str(session_id)],
@@ -249,6 +254,11 @@ class AsyncEngine:
                    # decode res
                    response = self.tokenizer.decode(res.tolist(),
                                                     offset=response_size)
+                    # utf-8 char at the end means it's a potential unfinished
+                    # byte sequence, continue to concate it with the next
+                    # sequence and decode them together
+                    if response.endswith('�'):
+                        continue
                    # response, history len, input len, generation len
                    yield GenOut(response, self.steps[str(session_id)],
                                 len(input_ids), tokens, finish_reason)

--- a/lmdeploy/serve/turbomind/chatbot.py
+++ b/lmdeploy/serve/turbomind/chatbot.py
@@ -657,8 +657,13 @@ class Chatbot:
                    continue
                output_str = postprocess(
                    output_ids, np.array([[n_token]], dtype=np.uint32))
-                n_token = output_ids.shape[-1]
                text = output_str[0].decode()
+                # utf-8 char at the end means it's a potential unfinished
+                # byte sequence, continue to concate it with the next
+                # sequence and decode them together
+                if text.endswith('�'):
+                    continue
+                n_token = output_ids.shape[-1]
                if display:
                    print(text, end='', flush=True)
                session.response += text

--- a/lmdeploy/turbomind/chat.py
+++ b/lmdeploy/turbomind/chat.py
@@ -145,6 +145,11 @@ def main(model_path,
                res, tokens = outputs[0]
                # decode res
                response = tokenizer.decode(res.tolist(), offset=response_size)
+                # utf-8 char at the end means it's a potential unfinished
+                # byte sequence, continue to concate it with the next
+                # sequence and decode them together
+                if response.endswith('�'):
+                    continue
                response = valid_str(response)
                print(f'{response}', end='', flush=True)
                response_size = tokens