Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
68296844
Unverified
Commit
68296844
authored
Aug 14, 2023
by
Lyu Han
Committed by
GitHub
Aug 14, 2023
Browse files
Fix TIS client got-no-space-result side effect brought by PR #197 (#222)
* rollback * rollback chatbot.py
parent
af517a4a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
17 deletions
+14
-17
lmdeploy/serve/turbomind/chatbot.py
lmdeploy/serve/turbomind/chatbot.py
+13
-16
lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt
...serve/turbomind/triton_models/postprocessing/config.pbtxt
+1
-1
No files found.
lmdeploy/serve/turbomind/chatbot.py
View file @
68296844
...
...
@@ -26,7 +26,6 @@ class Session:
request_id
:
str
=
''
histories
:
str
=
''
# history conversations of the session
sequence_length
:
int
=
0
# the total generated token number in the session
sequence_offset
:
int
=
0
# the new generated token offset in the session
prompt
:
str
=
''
response
:
str
=
''
status
:
int
=
None
# status of the session
...
...
@@ -599,15 +598,14 @@ class Chatbot:
Yields:
tuple: status, text, generated token number
"""
session
.
sequence_offset
=
n_input_token
+
preseq_length
sentinel
=
n_input_token
+
preseq_length
offset
=
n_input_token
+
preseq_length
status
,
res
,
n_token
=
None
,
''
,
0
while
True
:
result
=
res_queue
.
get
()
if
result
is
None
:
status
=
StatusCode
.
TRITON_STREAM_END
res
=
session
.
response
n_token
=
session
.
sequence_length
-
sentinel
n_token
=
session
.
sequence_length
-
offset
session
.
status
=
StatusCode
.
TRITON_STREAM_END
break
if
'errcode'
in
result
:
...
...
@@ -630,31 +628,30 @@ class Chatbot:
output_ids
=
result
.
as_numpy
(
'output_ids'
)
session
.
sequence_length
=
sequence_length
.
squeeze
()
new_token
_length
=
sequence_length
-
session
.
sequence_
offset
sequence
_length
=
sequence_length
-
offset
last_token_id
=
output_ids
[
-
1
][
-
1
][
session
.
sequence_length
-
1
]
if
last_token_id
==
eos_id
:
session
.
sequence_length
=
session
.
sequence_length
-
1
new_token
_length
=
new_token
_length
-
1
sequence
_length
=
sequence
_length
-
1
output_ids
=
output_ids
.
reshape
((
1
,
1
,
output_ids
.
shape
[
-
1
]))
new_token
_length
=
new_token
_length
.
reshape
(
(
1
,
new_token
_length
.
shape
[
-
1
]))
sequence
_length
=
sequence
_length
.
reshape
(
(
1
,
sequence
_length
.
shape
[
-
1
]))
if
profile_generation
:
yield
(
StatusCode
.
TRITON_STREAM_ING
,
'postprocessing is ignored during profiling '
'token generation'
,
new_token
_length
.
squeeze
())
'token generation'
,
sequence
_length
.
squeeze
())
continue
output_str
=
postprocess
(
output_ids
[:,
:,
session
.
sequence_offset
:],
new_token_length
)
session
.
sequence_offset
=
session
.
sequence_length
output_str
=
postprocess
(
output_ids
[:,
:,
offset
:],
sequence_length
)
text
=
output_str
[
0
].
decode
()
if
display
:
print
(
text
,
end
=
''
,
flush
=
True
)
session
.
response
+=
text
new_text
=
text
[
len
(
session
.
response
):]
print
(
new_text
,
end
=
''
,
flush
=
True
)
session
.
response
=
text
yield
(
StatusCode
.
TRITON_STREAM_ING
,
session
.
response
,
se
ssion
.
sequence_offset
-
sentinel
)
se
quence_length
.
squeeze
()
)
except
Exception
as
e
:
logger
.
error
(
f
'catch exception:
{
e
}
'
)
...
...
lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt
View file @
68296844
...
...
@@ -23,7 +23,7 @@ output [
instance_group [
{
count: 1
count: 1
6
kind: KIND_CPU
}
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment