Unverified Commit 84164f58 authored by Yuhao Tsui's avatar Yuhao Tsui Committed by GitHub
Browse files

Update completions.py

parent 52fa671c
...@@ -85,8 +85,12 @@ async def generate(request: Request, input: OllamaGenerateCompletionRequest): ...@@ -85,8 +85,12 @@ async def generate(request: Request, input: OllamaGenerateCompletionRequest):
return check_link_response(request, inner()) return check_link_response(request, inner())
else: else:
complete_response = "" complete_response = ""
async for token in interface.inference(input.prompt, id): async for res in interface.inference(input.prompt, id):
complete_response += token if isinstance(res, RawUsage):
raw_usage = res
else:
token, finish_reason = res
complete_response += token
response = OllamaGenerationResponse( response = OllamaGenerationResponse(
model=config.model_name, model=config.model_name,
created_at=str(datetime.now()), created_at=str(datetime.now()),
...@@ -187,8 +191,12 @@ async def chat(request: Request, input: OllamaChatCompletionRequest): ...@@ -187,8 +191,12 @@ async def chat(request: Request, input: OllamaChatCompletionRequest):
complete_response = "" complete_response = ""
eval_count = 0 eval_count = 0
async for token in interface.inference(prompt, id): async for res in interface.inference(prompt, id):
complete_response += token if isinstance(res, RawUsage):
raw_usage = res
else:
token, finish_reason = res
complete_response += token
eval_count += 1 eval_count += 1
end_time = time() end_time = time()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment