Commit 6d45871d authored by ceerrep's avatar ceerrep
Browse files

fix: workaround return dummy usage

parent db61375f
...@@ -5,7 +5,7 @@ from fastapi import APIRouter ...@@ -5,7 +5,7 @@ from fastapi import APIRouter
from fastapi.requests import Request from fastapi.requests import Request
from ktransformers.server.utils.create_interface import get_interface from ktransformers.server.utils.create_interface import get_interface
from ktransformers.server.schemas.assistants.streaming import chat_stream_response from ktransformers.server.schemas.assistants.streaming import chat_stream_response
from ktransformers.server.schemas.endpoints.chat import ChatCompletionCreate,ChatCompletionChunk,ChatCompletionObject from ktransformers.server.schemas.endpoints.chat import ChatCompletionCreate,ChatCompletionChunk,ChatCompletionObject, Usage
from ktransformers.server.backend.base import BackendInterfaceBase from ktransformers.server.backend.base import BackendInterfaceBase
from ktransformers.server.config.config import Config from ktransformers.server.config.config import Config
...@@ -34,6 +34,7 @@ async def chat_completion(request:Request,create:ChatCompletionCreate): ...@@ -34,6 +34,7 @@ async def chat_completion(request:Request,create:ChatCompletionCreate):
return chat_stream_response(request,inner()) return chat_stream_response(request,inner())
else: else:
comp = ChatCompletionObject(id=id,object='chat.completion.chunk',created=int(time())) comp = ChatCompletionObject(id=id,object='chat.completion.chunk',created=int(time()))
comp.usage = Usage(completion_tokens=1, prompt_tokens=1, total_tokens=2)
async for token in interface.inference(input_message,id): async for token in interface.inference(input_message,id):
comp.append_token(token) comp.append_token(token)
return comp return comp
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment