Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenych
chat_demo
Commits
8f65b603
Commit
8f65b603
authored
Aug 01, 2024
by
Rayyyyy
Browse files
Fix bug
parent
bb0a99c2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
6 deletions
+5
-6
llm_service/inferencer.py
llm_service/inferencer.py
+5
-6
No files found.
llm_service/inferencer.py
View file @
8f65b603
...
@@ -206,16 +206,15 @@ def llm_inference(args):
...
@@ -206,16 +206,15 @@ def llm_inference(args):
logger
.
info
(
f
"Get params: model_path
{
model_path
}
, use_vllm
{
use_vllm
}
, tensor_parallel_size
{
tensor_parallel_size
}
, stream_chat
{
stream_chat
}
"
)
logger
.
info
(
f
"Get params: model_path
{
model_path
}
, use_vllm
{
use_vllm
}
, tensor_parallel_size
{
tensor_parallel_size
}
, stream_chat
{
stream_chat
}
"
)
model
,
tokenzier
,
sampling_params
=
init_model
(
model_path
,
use_vllm
,
tensor_parallel_size
)
model
,
tokenzier
,
sampling_params
=
init_model
(
model_path
,
use_vllm
,
tensor_parallel_size
)
llm_infer
=
LLMInference
(
model
,
tokenzier
,
sampling_params
,
use_vllm
=
use_vllm
,
stream_chat
=
stream_chat
)
async
def
inference
(
request
):
async
def
inference
(
request
):
start
=
time
.
time
()
start
=
time
.
time
()
input_json
=
await
request
.
json
()
input_json
=
await
request
.
json
()
llm_infer
=
LLMInference
(
model
,
tokenzier
,
sampling_params
,
use_vllm
=
use_vllm
,
stream_chat
=
stream_chat
)
prompt
=
input_json
[
'query'
]
prompt
=
input_json
[
'query'
]
history
=
input_json
[
'history'
]
history
=
input_json
[
'history'
]
if
stream_chat
:
if
stream_chat
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment