Commit 8f65b603 authored by Rayyyyy's avatar Rayyyyy
Browse files

Fix bug

parent bb0a99c2
......@@ -206,16 +206,15 @@ def llm_inference(args):
logger.info(f"Get params: model_path {model_path}, use_vllm {use_vllm}, tensor_parallel_size {tensor_parallel_size}, stream_chat {stream_chat}")
model, tokenzier, sampling_params = init_model(model_path, use_vllm, tensor_parallel_size)
async def inference(request):
start = time.time()
input_json = await request.json()
llm_infer = LLMInference(model,
tokenzier,
sampling_params,
use_vllm=use_vllm,
stream_chat=stream_chat)
async def inference(request):
start = time.time()
input_json = await request.json()
prompt = input_json['query']
history = input_json['history']
if stream_chat:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment