Unverified Commit 57cf99b9 authored by fade_away's avatar fade_away Committed by GitHub
Browse files

bug-fix: always use stream mode to enable persistent batching (#346)


Co-authored-by: default avatarsleepwalker <just_for_singing@foxmail.com>
parent e4701226
......@@ -278,7 +278,7 @@ async def generate(request: GenerateRequest, raw_request: Request = None):
generation = VariableInterface.async_engine.generate(
request.prompt,
request.instance_id,
stream_response=request.stream,
stream_response=True, # always use stream to enable batching
sequence_start=request.sequence_start,
sequence_end=request.sequence_end,
request_output_len=request.request_output_len,
......@@ -303,12 +303,14 @@ async def generate(request: GenerateRequest, raw_request: Request = None):
return StreamingResponse(stream_results())
else:
ret = {}
text = ''
tokens = 0
finish_reason = None
async for out in generation:
ret = {
'text': out.response,
'tokens': out.generate_token_len,
'finish_reason': out.finish_reason
}
text += out.response
tokens += out.generate_token_len
finish_reason = out.finish_reason
ret = {'text': text, 'tokens': tokens, 'finish_reason': finish_reason}
return JSONResponse(ret)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment