Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenych
chat_demo
Commits
75ac58c8
Commit
75ac58c8
authored
Aug 06, 2024
by
chenych
Browse files
Modify stream chat
parent
4c5a8a74
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
10 deletions
+10
-10
llm_service/inferencer.py
llm_service/inferencer.py
+10
-10
No files found.
llm_service/inferencer.py
View file @
75ac58c8
...
...
@@ -216,7 +216,7 @@ def hf_inference(bind_port, model, tokenizer, stream_chat):
def
vllm_inference
(
bind_port
,
model
,
tokenizer
,
sampling_params
,
stream_chat
):
'''启动 Web 服务器,接收 HTTP 请求,并通过调用本地的 LLM 推理服务生成响应. '''
import
uuid
import
json
from
typing
import
AsyncGenerator
from
fastapi.responses
import
StreamingResponse
...
...
@@ -241,22 +241,22 @@ def vllm_inference(bind_port, model, tokenizer, sampling_params, stream_chat):
async
def
stream_results
()
->
AsyncGenerator
[
bytes
,
None
]:
final_output
=
None
async
for
request_output
in
results_generator
:
final_output
=
request_output
#
final_output = request_output
text_outputs
=
[
output
.
text
for
output
in
request_output
.
outputs
]
ret
=
{
"text"
:
text_outputs
}
print
(
ret
)
#
yield (json.dumps(ret) + "\0").encode("utf-8")
yield
(
json
.
dumps
(
ret
)
+
"
\0
"
).
encode
(
"utf-8"
)
# yield web.json_response({'text': text_outputs})
assert
final_output
is
not
None
return
[
output
.
text
for
output
in
final_output
.
outputs
]
#
assert final_output is not None
#
return [output.text for output in final_output.outputs]
if
stream_chat
:
logger
.
info
(
"****************** in chat stream *****************"
)
#
return StreamingResponse(stream_results())
text
=
await
stream_results
()
output_text
=
substitution
(
text
)
logger
.
debug
(
'问题:{} 回答:{}
\n
timecost {} '
.
format
(
prompt
,
output_text
,
time
.
time
()
-
start
))
return
web
.
json_response
({
'text'
:
output_text
})
return
StreamingResponse
(
stream_results
())
#
text = await stream_results()
#
output_text = substitution(text)
#
logger.debug('问题:{} 回答:{} \ntimecost {} '.format(prompt, output_text, time.time() - start))
#
return web.json_response({'text': output_text})
# Non-streaming case
logger
.
info
(
"****************** in chat ******************"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment