Unverified Commit 40148041 authored by Wen Sun's avatar Wen Sun Committed by GitHub
Browse files

Ensure Usage Data in Streaming Responses Aligns with vLLM’s Implementation (#3814)

parent ad46550d
...@@ -820,13 +820,13 @@ async def v1_completions(tokenizer_manager, raw_request: Request): ...@@ -820,13 +820,13 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
) )
final_usage_chunk = CompletionStreamResponse( final_usage_chunk = CompletionStreamResponse(
id=str(uuid.uuid4().hex), id=content["meta_info"]["id"],
choices=[], choices=[],
model=request.model, model=request.model,
usage=usage, usage=usage,
) )
final_usage_data = final_usage_chunk.model_dump_json( final_usage_data = final_usage_chunk.model_dump_json(
exclude_unset=True, exclude_none=True exclude_none=True
) )
yield f"data: {final_usage_data}\n\n" yield f"data: {final_usage_data}\n\n"
except ValueError as e: except ValueError as e:
...@@ -1495,13 +1495,13 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ...@@ -1495,13 +1495,13 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
) )
final_usage_chunk = ChatCompletionStreamResponse( final_usage_chunk = ChatCompletionStreamResponse(
id=str(uuid.uuid4().hex), id=content["meta_info"]["id"],
choices=[], choices=[],
model=request.model, model=request.model,
usage=usage, usage=usage,
) )
final_usage_data = final_usage_chunk.model_dump_json( final_usage_data = final_usage_chunk.model_dump_json(
exclude_unset=True, exclude_none=True exclude_none=True
) )
yield f"data: {final_usage_data}\n\n" yield f"data: {final_usage_data}\n\n"
except ValueError as e: except ValueError as e:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment