Unverified Commit 57cf99b9 authored by fade_away's avatar fade_away Committed by GitHub
Browse files

bug-fix: always use stream mode to enable persistent batching (#346)


Co-authored-by: default avatarsleepwalker <just_for_singing@foxmail.com>
parent e4701226
...@@ -278,7 +278,7 @@ async def generate(request: GenerateRequest, raw_request: Request = None): ...@@ -278,7 +278,7 @@ async def generate(request: GenerateRequest, raw_request: Request = None):
generation = VariableInterface.async_engine.generate( generation = VariableInterface.async_engine.generate(
request.prompt, request.prompt,
request.instance_id, request.instance_id,
stream_response=request.stream, stream_response=True, # always use stream to enable batching
sequence_start=request.sequence_start, sequence_start=request.sequence_start,
sequence_end=request.sequence_end, sequence_end=request.sequence_end,
request_output_len=request.request_output_len, request_output_len=request.request_output_len,
...@@ -303,12 +303,14 @@ async def generate(request: GenerateRequest, raw_request: Request = None): ...@@ -303,12 +303,14 @@ async def generate(request: GenerateRequest, raw_request: Request = None):
return StreamingResponse(stream_results()) return StreamingResponse(stream_results())
else: else:
ret = {} ret = {}
text = ''
tokens = 0
finish_reason = None
async for out in generation: async for out in generation:
ret = { text += out.response
'text': out.response, tokens += out.generate_token_len
'tokens': out.generate_token_len, finish_reason = out.finish_reason
'finish_reason': out.finish_reason ret = {'text': text, 'tokens': tokens, 'finish_reason': finish_reason}
}
return JSONResponse(ret) return JSONResponse(ret)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment