Unverified Commit 84164f58 authored by Yuhao Tsui's avatar Yuhao Tsui Committed by GitHub
Browse files

Update completions.py

parent 52fa671c
......@@ -85,8 +85,12 @@ async def generate(request: Request, input: OllamaGenerateCompletionRequest):
return check_link_response(request, inner())
else:
complete_response = ""
async for token in interface.inference(input.prompt, id):
complete_response += token
async for res in interface.inference(input.prompt, id):
if isinstance(res, RawUsage):
raw_usage = res
else:
token, finish_reason = res
complete_response += token
response = OllamaGenerationResponse(
model=config.model_name,
created_at=str(datetime.now()),
......@@ -187,8 +191,12 @@ async def chat(request: Request, input: OllamaChatCompletionRequest):
complete_response = ""
eval_count = 0
async for token in interface.inference(prompt, id):
complete_response += token
async for res in interface.inference(prompt, id):
if isinstance(res, RawUsage):
raw_usage = res
else:
token, finish_reason = res
complete_response += token
eval_count += 1
end_time = time()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment