Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen_lmdeploy
Commits
eb3b4dc9
Unverified
Commit
eb3b4dc9
authored
Oct 18, 2023
by
AllentDan
Committed by
GitHub
Oct 18, 2023
Browse files
avoid split chinese characters during decoding (#566)
parent
9c3634ec
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
1 deletion
+21
-1
lmdeploy/serve/async_engine.py
lmdeploy/serve/async_engine.py
+10
-0
lmdeploy/serve/turbomind/chatbot.py
lmdeploy/serve/turbomind/chatbot.py
+6
-1
lmdeploy/turbomind/chat.py
lmdeploy/turbomind/chat.py
+5
-0
No files found.
lmdeploy/serve/async_engine.py
View file @
eb3b4dc9
...
...
@@ -156,6 +156,11 @@ class AsyncEngine:
# decode res
response
=
self
.
tokenizer
.
decode
(
res
.
tolist
(),
offset
=
response_size
)
# utf-8 char at the end means it's a potential unfinished
# byte sequence, continue to concate it with the next
# sequence and decode them together
if
response
.
endswith
(
'�'
):
continue
# response, history token len,
# input token len, gen token len
yield
GenOut
(
response
,
self
.
steps
[
str
(
session_id
)],
...
...
@@ -249,6 +254,11 @@ class AsyncEngine:
# decode res
response
=
self
.
tokenizer
.
decode
(
res
.
tolist
(),
offset
=
response_size
)
# utf-8 char at the end means it's a potential unfinished
# byte sequence, continue to concate it with the next
# sequence and decode them together
if
response
.
endswith
(
'�'
):
continue
# response, history len, input len, generation len
yield
GenOut
(
response
,
self
.
steps
[
str
(
session_id
)],
len
(
input_ids
),
tokens
,
finish_reason
)
...
...
lmdeploy/serve/turbomind/chatbot.py
View file @
eb3b4dc9
...
...
@@ -657,8 +657,13 @@ class Chatbot:
continue
output_str
=
postprocess
(
output_ids
,
np
.
array
([[
n_token
]],
dtype
=
np
.
uint32
))
n_token
=
output_ids
.
shape
[
-
1
]
text
=
output_str
[
0
].
decode
()
# utf-8 char at the end means it's a potential unfinished
# byte sequence, continue to concate it with the next
# sequence and decode them together
if
text
.
endswith
(
'�'
):
continue
n_token
=
output_ids
.
shape
[
-
1
]
if
display
:
print
(
text
,
end
=
''
,
flush
=
True
)
session
.
response
+=
text
...
...
lmdeploy/turbomind/chat.py
View file @
eb3b4dc9
...
...
@@ -145,6 +145,11 @@ def main(model_path,
res
,
tokens
=
outputs
[
0
]
# decode res
response
=
tokenizer
.
decode
(
res
.
tolist
(),
offset
=
response_size
)
# utf-8 char at the end means it's a potential unfinished
# byte sequence, continue to concate it with the next
# sequence and decode them together
if
response
.
endswith
(
'�'
):
continue
response
=
valid_str
(
response
)
print
(
f
'
{
response
}
'
,
end
=
''
,
flush
=
True
)
response_size
=
tokens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment