Ensure Usage Data in Streaming Responses Aligns with vLLM’s Implementation (#3814)

40148041 · Wen Sun · GitHub · ad46550d · 40148041
Unverified Commit 40148041 authored Mar 13, 2025 by Wen Sun Committed by GitHub Mar 12, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

python/sglang/srt/openai_api/adapter.py python/sglang/srt/openai_api/adapter.py +4 -4

No files found.
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -820,13 +820,13 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
                    )
                    final_usage_chunk = CompletionStreamResponse(
-                        id=str(uuid.uuid4().hex),
+                        id=content["meta_info"]["id"],
                        choices=[],
                        model=request.model,
                        usage=usage,
                    )
                    final_usage_data = final_usage_chunk.model_dump_json(
-                        exclude_unset=True, exclude_none=True
+                        exclude_none=True
                    )
                    yield f"data: {final_usage_data}\n\n"
            except ValueError as e:
@@ -1495,13 +1495,13 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
                    )
                    final_usage_chunk = ChatCompletionStreamResponse(
-                        id=str(uuid.uuid4().hex),
+                        id=content["meta_info"]["id"],
                        choices=[],
                        model=request.model,
                        usage=usage,
                    )
                    final_usage_data = final_usage_chunk.model_dump_json(
-                        exclude_unset=True, exclude_none=True
+                        exclude_none=True
                    )
                    yield f"data: {final_usage_data}\n\n"
            except ValueError as e: