Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bff2e5f1
Unverified
Commit
bff2e5f1
authored
Sep 17, 2025
by
Andrew Xia
Committed by
GitHub
Sep 17, 2025
Browse files
[gpt-oss][2] fix types for streaming (#24556)
Signed-off-by:
Andrew Xia
<
axia@meta.com
>
parent
3c068c63
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
96 deletions
+104
-96
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+5
-4
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+33
-4
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+66
-88
No files found.
vllm/entrypoints/openai/api_server.py
View file @
bff2e5f1
...
...
@@ -27,7 +27,6 @@ from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
openai
import
BaseModel
from
prometheus_client
import
make_asgi_app
from
prometheus_fastapi_instrumentator
import
Instrumentator
from
starlette.concurrency
import
iterate_in_threadpool
...
...
@@ -67,7 +66,9 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
RerankRequest
,
RerankResponse
,
ResponsesRequest
,
ResponsesResponse
,
ScoreRequest
,
ScoreResponse
,
TokenizeRequest
,
ScoreResponse
,
StreamingResponsesResponse
,
TokenizeRequest
,
TokenizeResponse
,
TranscriptionRequest
,
TranscriptionResponse
,
...
...
@@ -481,8 +482,8 @@ async def show_version():
async
def
_convert_stream_to_sse_events
(
generator
:
AsyncGenerator
[
BaseModel
,
None
]
)
->
AsyncGenerator
[
str
,
None
]:
generator
:
AsyncGenerator
[
StreamingResponsesResponse
,
None
]
)
->
AsyncGenerator
[
str
,
None
]:
"""Convert the generator to a stream of events in SSE format"""
async
for
event
in
generator
:
event_type
=
getattr
(
event
,
'type'
,
'unknown'
)
...
...
vllm/entrypoints/openai/protocol.py
View file @
bff2e5f1
...
...
@@ -18,10 +18,19 @@ from openai.types.chat.chat_completion_audio import (
from
openai.types.chat.chat_completion_message
import
(
Annotation
as
OpenAIAnnotation
)
# yapf: enable
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseInputItemParam
,
ResponseOutputItem
,
ResponsePrompt
,
ResponseReasoningItem
,
ResponseStatus
)
from
openai.types.responses
import
(
ResponseCodeInterpreterCallCodeDeltaEvent
,
ResponseCodeInterpreterCallCodeDoneEvent
,
ResponseCodeInterpreterCallCompletedEvent
,
ResponseCodeInterpreterCallInProgressEvent
,
ResponseCodeInterpreterCallInterpretingEvent
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
ResponseCreatedEvent
,
ResponseFunctionToolCall
,
ResponseInProgressEvent
,
ResponseInputItemParam
,
ResponseOutputItem
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponsePrompt
,
ResponseReasoningItem
,
ResponseReasoningTextDeltaEvent
,
ResponseReasoningTextDoneEvent
,
ResponseStatus
,
ResponseWebSearchCallCompletedEvent
,
ResponseWebSearchCallInProgressEvent
,
ResponseWebSearchCallSearchingEvent
)
# Backward compatibility for OpenAI client versions
try
:
# For older openai versions (< 1.100.0)
...
...
@@ -251,6 +260,26 @@ ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
ResponseReasoningItem
,
ResponseFunctionToolCall
]
StreamingResponsesResponse
:
TypeAlias
=
Union
[
ResponseCreatedEvent
,
ResponseInProgressEvent
,
ResponseCompletedEvent
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
ResponseReasoningTextDeltaEvent
,
ResponseReasoningTextDoneEvent
,
ResponseCodeInterpreterCallInProgressEvent
,
ResponseCodeInterpreterCallCodeDeltaEvent
,
ResponseWebSearchCallInProgressEvent
,
ResponseWebSearchCallSearchingEvent
,
ResponseWebSearchCallCompletedEvent
,
ResponseCodeInterpreterCallCodeDoneEvent
,
ResponseCodeInterpreterCallInterpretingEvent
,
ResponseCodeInterpreterCallCompletedEvent
,
]
class
ResponsesRequest
(
OpenAIBaseModel
):
# Ordered by official OpenAI API documentation
...
...
vllm/entrypoints/openai/serving_responses.py
View file @
bff2e5f1
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment