Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bff2e5f1
Unverified
Commit
bff2e5f1
authored
Sep 17, 2025
by
Andrew Xia
Committed by
GitHub
Sep 17, 2025
Browse files
[gpt-oss][2] fix types for streaming (#24556)
Signed-off-by:
Andrew Xia
<
axia@meta.com
>
parent
3c068c63
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
96 deletions
+104
-96
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+5
-4
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+33
-4
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_responses.py
+66
-88
No files found.
vllm/entrypoints/openai/api_server.py
View file @
bff2e5f1
...
@@ -27,7 +27,6 @@ from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
...
@@ -27,7 +27,6 @@ from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
openai
import
BaseModel
from
prometheus_client
import
make_asgi_app
from
prometheus_client
import
make_asgi_app
from
prometheus_fastapi_instrumentator
import
Instrumentator
from
prometheus_fastapi_instrumentator
import
Instrumentator
from
starlette.concurrency
import
iterate_in_threadpool
from
starlette.concurrency
import
iterate_in_threadpool
...
@@ -67,7 +66,9 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
...
@@ -67,7 +66,9 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
RerankRequest
,
RerankResponse
,
RerankRequest
,
RerankResponse
,
ResponsesRequest
,
ResponsesRequest
,
ResponsesResponse
,
ScoreRequest
,
ResponsesResponse
,
ScoreRequest
,
ScoreResponse
,
TokenizeRequest
,
ScoreResponse
,
StreamingResponsesResponse
,
TokenizeRequest
,
TokenizeResponse
,
TokenizeResponse
,
TranscriptionRequest
,
TranscriptionRequest
,
TranscriptionResponse
,
TranscriptionResponse
,
...
@@ -481,8 +482,8 @@ async def show_version():
...
@@ -481,8 +482,8 @@ async def show_version():
async
def
_convert_stream_to_sse_events
(
async
def
_convert_stream_to_sse_events
(
generator
:
AsyncGenerator
[
BaseModel
,
generator
:
AsyncGenerator
[
StreamingResponsesResponse
,
None
]
None
]
)
->
AsyncGenerator
[
str
,
None
]:
)
->
AsyncGenerator
[
str
,
None
]:
"""Convert the generator to a stream of events in SSE format"""
"""Convert the generator to a stream of events in SSE format"""
async
for
event
in
generator
:
async
for
event
in
generator
:
event_type
=
getattr
(
event
,
'type'
,
'unknown'
)
event_type
=
getattr
(
event
,
'type'
,
'unknown'
)
...
...
vllm/entrypoints/openai/protocol.py
View file @
bff2e5f1
...
@@ -18,10 +18,19 @@ from openai.types.chat.chat_completion_audio import (
...
@@ -18,10 +18,19 @@ from openai.types.chat.chat_completion_audio import (
from
openai.types.chat.chat_completion_message
import
(
from
openai.types.chat.chat_completion_message
import
(
Annotation
as
OpenAIAnnotation
)
Annotation
as
OpenAIAnnotation
)
# yapf: enable
# yapf: enable
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
from
openai.types.responses
import
(
ResponseInputItemParam
,
ResponseOutputItem
,
ResponseCodeInterpreterCallCodeDeltaEvent
,
ResponsePrompt
,
ResponseReasoningItem
,
ResponseCodeInterpreterCallCodeDoneEvent
,
ResponseStatus
)
ResponseCodeInterpreterCallCompletedEvent
,
ResponseCodeInterpreterCallInProgressEvent
,
ResponseCodeInterpreterCallInterpretingEvent
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
ResponseCreatedEvent
,
ResponseFunctionToolCall
,
ResponseInProgressEvent
,
ResponseInputItemParam
,
ResponseOutputItem
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponsePrompt
,
ResponseReasoningItem
,
ResponseReasoningTextDeltaEvent
,
ResponseReasoningTextDoneEvent
,
ResponseStatus
,
ResponseWebSearchCallCompletedEvent
,
ResponseWebSearchCallInProgressEvent
,
ResponseWebSearchCallSearchingEvent
)
# Backward compatibility for OpenAI client versions
# Backward compatibility for OpenAI client versions
try
:
# For older openai versions (< 1.100.0)
try
:
# For older openai versions (< 1.100.0)
...
@@ -251,6 +260,26 @@ ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
...
@@ -251,6 +260,26 @@ ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
ResponseReasoningItem
,
ResponseReasoningItem
,
ResponseFunctionToolCall
]
ResponseFunctionToolCall
]
StreamingResponsesResponse
:
TypeAlias
=
Union
[
ResponseCreatedEvent
,
ResponseInProgressEvent
,
ResponseCompletedEvent
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
ResponseReasoningTextDeltaEvent
,
ResponseReasoningTextDoneEvent
,
ResponseCodeInterpreterCallInProgressEvent
,
ResponseCodeInterpreterCallCodeDeltaEvent
,
ResponseWebSearchCallInProgressEvent
,
ResponseWebSearchCallSearchingEvent
,
ResponseWebSearchCallCompletedEvent
,
ResponseCodeInterpreterCallCodeDoneEvent
,
ResponseCodeInterpreterCallInterpretingEvent
,
ResponseCodeInterpreterCallCompletedEvent
,
]
class
ResponsesRequest
(
OpenAIBaseModel
):
class
ResponsesRequest
(
OpenAIBaseModel
):
# Ordered by official OpenAI API documentation
# Ordered by official OpenAI API documentation
...
...
vllm/entrypoints/openai/serving_responses.py
View file @
bff2e5f1
...
@@ -10,24 +10,28 @@ from collections.abc import AsyncGenerator, AsyncIterator, Sequence
...
@@ -10,24 +10,28 @@ from collections.abc import AsyncGenerator, AsyncIterator, Sequence
from
contextlib
import
AsyncExitStack
from
contextlib
import
AsyncExitStack
from
copy
import
copy
from
copy
import
copy
from
http
import
HTTPStatus
from
http
import
HTTPStatus
from
typing
import
Callable
,
Final
,
Optional
,
TypeVar
,
Union
from
typing
import
Callable
,
Final
,
Optional
,
Union
import
jinja2
import
jinja2
import
openai.types.responses
as
openai_responses_types
from
fastapi
import
Request
from
fastapi
import
Request
from
openai
import
BaseModel
# yapf conflicts with isort for this block
# yapf conflicts with isort for this block
# yapf: disable
# yapf: disable
from
openai.types.responses
import
(
ResponseCreatedEvent
,
from
openai.types.responses
import
(
ResponseFunctionToolCall
,
ResponseCodeInterpreterCallCodeDeltaEvent
,
ResponseInProgressEvent
,
ResponseCodeInterpreterCallCodeDoneEvent
,
ResponseOutputItem
,
ResponseCodeInterpreterCallCompletedEvent
,
ResponseOutputItemDoneEvent
,
ResponseCodeInterpreterCallInProgressEvent
,
ResponseOutputMessage
,
ResponseOutputText
,
ResponseCodeInterpreterCallInterpretingEvent
,
ResponseReasoningItem
,
ResponseCodeInterpreterToolCallParam
,
ResponseCompletedEvent
,
ResponseReasoningTextDeltaEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
ResponseReasoningTextDoneEvent
,
ResponseCreatedEvent
,
ResponseFunctionToolCall
,
ResponseFunctionWebSearch
,
ResponseStatus
,
response_text_delta_event
)
ResponseInProgressEvent
,
ResponseOutputItem
,
ResponseOutputItemAddedEvent
,
ResponseOutputItemDoneEvent
,
ResponseOutputMessage
,
ResponseOutputText
,
ResponseReasoningItem
,
ResponseReasoningTextDeltaEvent
,
ResponseReasoningTextDoneEvent
,
ResponseStatus
,
ResponseTextDeltaEvent
,
ResponseTextDoneEvent
,
ResponseWebSearchCallCompletedEvent
,
ResponseWebSearchCallInProgressEvent
,
ResponseWebSearchCallSearchingEvent
,
response_function_web_search
,
response_text_delta_event
)
from
openai.types.responses.response_output_text
import
(
Logprob
,
from
openai.types.responses.response_output_text
import
(
Logprob
,
LogprobTopLogprob
)
LogprobTopLogprob
)
# yapf: enable
# yapf: enable
...
@@ -55,7 +59,8 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
...
@@ -55,7 +59,8 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, ErrorResponse,
OutputTokensDetails
,
OutputTokensDetails
,
RequestResponseMetadata
,
RequestResponseMetadata
,
ResponsesRequest
,
ResponsesRequest
,
ResponsesResponse
,
ResponseUsage
)
ResponsesResponse
,
ResponseUsage
,
StreamingResponsesResponse
)
# yapf: enable
# yapf: enable
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
OpenAIServingModels
...
@@ -175,7 +180,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -175,7 +180,7 @@ class OpenAIServingResponses(OpenAIServing):
# HACK(wuhang): This is a hack. We should use a better store.
# HACK(wuhang): This is a hack. We should use a better store.
# FIXME: If enable_store=True, this may cause a memory leak since we
# FIXME: If enable_store=True, this may cause a memory leak since we
# never remove events from the store.
# never remove events from the store.
self
.
event_store
:
dict
[
str
,
tuple
[
deque
[
BaseModel
],
self
.
event_store
:
dict
[
str
,
tuple
[
deque
[
StreamingResponsesResponse
],
asyncio
.
Event
]]
=
{}
asyncio
.
Event
]]
=
{}
self
.
background_tasks
:
dict
[
str
,
asyncio
.
Task
]
=
{}
self
.
background_tasks
:
dict
[
str
,
asyncio
.
Task
]
=
{}
...
@@ -186,8 +191,8 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -186,8 +191,8 @@ class OpenAIServingResponses(OpenAIServing):
self
,
self
,
request
:
ResponsesRequest
,
request
:
ResponsesRequest
,
raw_request
:
Optional
[
Request
]
=
None
,
raw_request
:
Optional
[
Request
]
=
None
,
)
->
Union
[
AsyncGenerator
[
BaseModel
,
None
],
ResponsesResponse
,
)
->
Union
[
AsyncGenerator
[
Streaming
ResponsesResponse
,
None
],
ErrorResponse
]:
ResponsesResponse
,
ErrorResponse
]:
error_check_ret
=
await
self
.
_check_model
(
request
)
error_check_ret
=
await
self
.
_check_model
(
request
)
if
error_check_ret
is
not
None
:
if
error_check_ret
is
not
None
:
logger
.
error
(
"Error with model %s"
,
error_check_ret
)
logger
.
error
(
"Error with model %s"
,
error_check_ret
)
...
@@ -814,7 +819,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -814,7 +819,7 @@ class OpenAIServingResponses(OpenAIServing):
*
args
,
*
args
,
**
kwargs
,
**
kwargs
,
):
):
event_deque
:
deque
[
BaseModel
]
=
deque
()
event_deque
:
deque
[
StreamingResponsesResponse
]
=
deque
()
new_event_signal
=
asyncio
.
Event
()
new_event_signal
=
asyncio
.
Event
()
self
.
event_store
[
request
.
request_id
]
=
(
event_deque
,
new_event_signal
)
self
.
event_store
[
request
.
request_id
]
=
(
event_deque
,
new_event_signal
)
response
=
None
response
=
None
...
@@ -867,7 +872,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -867,7 +872,7 @@ class OpenAIServingResponses(OpenAIServing):
self
,
self
,
response_id
:
str
,
response_id
:
str
,
starting_after
:
Optional
[
int
]
=
None
,
starting_after
:
Optional
[
int
]
=
None
,
)
->
AsyncGenerator
[
BaseModel
,
None
]:
)
->
AsyncGenerator
[
StreamingResponsesResponse
,
None
]:
if
response_id
not
in
self
.
event_store
:
if
response_id
not
in
self
.
event_store
:
raise
ValueError
(
f
"Unknown response_id:
{
response_id
}
"
)
raise
ValueError
(
f
"Unknown response_id:
{
response_id
}
"
)
...
@@ -893,8 +898,8 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -893,8 +898,8 @@ class OpenAIServingResponses(OpenAIServing):
response_id
:
str
,
response_id
:
str
,
starting_after
:
Optional
[
int
],
starting_after
:
Optional
[
int
],
stream
:
Optional
[
bool
],
stream
:
Optional
[
bool
],
)
->
Union
[
ErrorResponse
,
ResponsesResponse
,
AsyncGenerator
[
BaseModel
,
)
->
Union
[
ErrorResponse
,
ResponsesResponse
,
AsyncGenerator
[
None
]]:
StreamingResponsesResponse
,
None
]]:
if
not
response_id
.
startswith
(
"resp_"
):
if
not
response_id
.
startswith
(
"resp_"
):
return
self
.
_make_invalid_id_error
(
response_id
)
return
self
.
_make_invalid_id_error
(
response_id
)
...
@@ -977,9 +982,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -977,9 +982,9 @@ class OpenAIServingResponses(OpenAIServing):
tokenizer
:
AnyTokenizer
,
tokenizer
:
AnyTokenizer
,
request_metadata
:
RequestResponseMetadata
,
request_metadata
:
RequestResponseMetadata
,
created_time
:
int
,
created_time
:
int
,
_increment_sequence_number_and_return
:
Callable
[
[
BaseModel
],
_increment_sequence_number_and_return
:
Callable
[
BaseModel
],
[
StreamingResponsesResponse
],
StreamingResponsesResponse
],
)
->
AsyncGenerator
[
BaseModel
,
None
]:
)
->
AsyncGenerator
[
StreamingResponsesResponse
,
None
]:
current_content_index
=
0
current_content_index
=
0
current_output_index
=
0
current_output_index
=
0
current_item_id
=
""
current_item_id
=
""
...
@@ -1017,13 +1022,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1017,13 +1022,11 @@ class OpenAIServingResponses(OpenAIServing):
current_item_id
=
str
(
uuid
.
uuid4
())
current_item_id
=
str
(
uuid
.
uuid4
())
if
delta_message
.
reasoning_content
:
if
delta_message
.
reasoning_content
:
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseReasoningItem
(
ResponseReasoningItem
(
type
=
"reasoning"
,
type
=
"reasoning"
,
id
=
current_item_id
,
id
=
current_item_id
,
summary
=
[],
summary
=
[],
...
@@ -1032,13 +1035,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1032,13 +1035,11 @@ class OpenAIServingResponses(OpenAIServing):
))
))
else
:
else
:
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseOutputMessage
(
ResponseOutputMessage
(
id
=
current_item_id
,
id
=
current_item_id
,
type
=
"message"
,
type
=
"message"
,
role
=
"assistant"
,
role
=
"assistant"
,
...
@@ -1047,13 +1048,13 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1047,13 +1048,13 @@ class OpenAIServingResponses(OpenAIServing):
),
),
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartAddedEvent
(
ResponseContentPartAddedEvent
(
type
=
"response.content_part.added"
,
type
=
"response.content_part.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item_id
=
current_item_id
,
item_id
=
current_item_id
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
part
=
openai_responses_types
.
ResponseOutputText
(
part
=
ResponseOutputText
(
type
=
"output_text"
,
type
=
"output_text"
,
text
=
""
,
text
=
""
,
annotations
=
[],
annotations
=
[],
...
@@ -1104,11 +1105,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1104,11 +1105,11 @@ class OpenAIServingResponses(OpenAIServing):
item
=
reasoning_item
,
item
=
reasoning_item
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
ResponseOutputMessage
(
item
=
ResponseOutputMessage
(
id
=
current_item_id
,
id
=
current_item_id
,
type
=
"message"
,
type
=
"message"
,
role
=
"assistant"
,
role
=
"assistant"
,
...
@@ -1119,13 +1120,13 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1119,13 +1120,13 @@ class OpenAIServingResponses(OpenAIServing):
current_output_index
+=
1
current_output_index
+=
1
current_item_id
=
str
(
uuid
.
uuid4
())
current_item_id
=
str
(
uuid
.
uuid4
())
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartAddedEvent
(
ResponseContentPartAddedEvent
(
type
=
"response.content_part.added"
,
type
=
"response.content_part.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item_id
=
current_item_id
,
item_id
=
current_item_id
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
part
=
openai_responses_types
.
ResponseOutputText
(
part
=
ResponseOutputText
(
type
=
"output_text"
,
type
=
"output_text"
,
text
=
""
,
text
=
""
,
annotations
=
[],
annotations
=
[],
...
@@ -1148,7 +1149,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1148,7 +1149,7 @@ class OpenAIServingResponses(OpenAIServing):
))
))
elif
delta_message
.
content
is
not
None
:
elif
delta_message
.
content
is
not
None
:
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseTextDeltaEvent
(
ResponseTextDeltaEvent
(
type
=
"response.output_text.delta"
,
type
=
"response.output_text.delta"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
...
@@ -1204,7 +1205,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1204,7 +1205,7 @@ class OpenAIServingResponses(OpenAIServing):
for
pm
in
previous_delta_messages
for
pm
in
previous_delta_messages
if
pm
.
content
is
not
None
)
if
pm
.
content
is
not
None
)
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseTextDoneEvent
(
ResponseTextDoneEvent
(
type
=
"response.output_text.done"
,
type
=
"response.output_text.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
...
@@ -1220,7 +1221,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1220,7 +1221,7 @@ class OpenAIServingResponses(OpenAIServing):
annotations
=
[],
annotations
=
[],
)
)
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartDoneEvent
(
ResponseContentPartDoneEvent
(
type
=
"response.content_part.done"
,
type
=
"response.content_part.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
item_id
=
current_item_id
,
item_id
=
current_item_id
,
...
@@ -1257,9 +1258,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1257,9 +1258,9 @@ class OpenAIServingResponses(OpenAIServing):
tokenizer
:
AnyTokenizer
,
tokenizer
:
AnyTokenizer
,
request_metadata
:
RequestResponseMetadata
,
request_metadata
:
RequestResponseMetadata
,
created_time
:
int
,
created_time
:
int
,
_increment_sequence_number_and_return
:
Callable
[
[
BaseModel
],
_increment_sequence_number_and_return
:
Callable
[
BaseModel
],
[
StreamingResponsesResponse
],
StreamingResponsesResponse
],
)
->
AsyncGenerator
[
BaseModel
,
None
]:
)
->
AsyncGenerator
[
StreamingResponsesResponse
,
None
]:
current_content_index
=
-
1
current_content_index
=
-
1
current_output_index
=
0
current_output_index
=
0
current_item_id
:
str
=
""
current_item_id
:
str
=
""
...
@@ -1314,7 +1315,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1314,7 +1315,7 @@ class OpenAIServingResponses(OpenAIServing):
annotations
=
[],
annotations
=
[],
)
)
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseTextDoneEvent
(
ResponseTextDoneEvent
(
type
=
"response.output_text.done"
,
type
=
"response.output_text.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
...
@@ -1324,7 +1325,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1324,7 +1325,6 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartDoneEvent
(
ResponseContentPartDoneEvent
(
type
=
"response.content_part.done"
,
type
=
"response.content_part.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1334,7 +1334,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1334,7 +1334,7 @@ class OpenAIServingResponses(OpenAIServing):
part
=
text_content
,
part
=
text_content
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemDoneEvent
(
ResponseOutputItemDoneEvent
(
type
=
"response.output_item.done"
,
type
=
"response.output_item.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
...
@@ -1355,13 +1355,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1355,13 +1355,11 @@ class OpenAIServingResponses(OpenAIServing):
sent_output_item_added
=
True
sent_output_item_added
=
True
current_item_id
=
f
"msg_
{
random_uuid
()
}
"
current_item_id
=
f
"msg_
{
random_uuid
()
}
"
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseOutputMessage
(
ResponseOutputMessage
(
id
=
current_item_id
,
id
=
current_item_id
,
type
=
"message"
,
type
=
"message"
,
role
=
"assistant"
,
role
=
"assistant"
,
...
@@ -1371,14 +1369,13 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1371,14 +1369,13 @@ class OpenAIServingResponses(OpenAIServing):
))
))
current_content_index
+=
1
current_content_index
+=
1
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartAddedEvent
(
ResponseContentPartAddedEvent
(
type
=
"response.content_part.added"
,
type
=
"response.content_part.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item_id
=
current_item_id
,
item_id
=
current_item_id
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
part
=
openai_responses_types
.
ResponseOutputText
(
part
=
ResponseOutputText
(
type
=
"output_text"
,
type
=
"output_text"
,
text
=
""
,
text
=
""
,
annotations
=
[],
annotations
=
[],
...
@@ -1386,7 +1383,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1386,7 +1383,7 @@ class OpenAIServingResponses(OpenAIServing):
),
),
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseTextDeltaEvent
(
ResponseTextDeltaEvent
(
type
=
"response.output_text.delta"
,
type
=
"response.output_text.delta"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
...
@@ -1402,13 +1399,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1402,13 +1399,11 @@ class OpenAIServingResponses(OpenAIServing):
sent_output_item_added
=
True
sent_output_item_added
=
True
current_item_id
=
f
"msg_
{
random_uuid
()
}
"
current_item_id
=
f
"msg_
{
random_uuid
()
}
"
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseReasoningItem
(
ResponseReasoningItem
(
type
=
"reasoning"
,
type
=
"reasoning"
,
id
=
current_item_id
,
id
=
current_item_id
,
summary
=
[],
summary
=
[],
...
@@ -1417,14 +1412,13 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1417,14 +1412,13 @@ class OpenAIServingResponses(OpenAIServing):
))
))
current_content_index
+=
1
current_content_index
+=
1
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseContentPartAddedEvent
(
ResponseContentPartAddedEvent
(
type
=
"response.content_part.added"
,
type
=
"response.content_part.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item_id
=
current_item_id
,
item_id
=
current_item_id
,
content_index
=
current_content_index
,
content_index
=
current_content_index
,
part
=
openai_responses_types
.
ResponseOutputText
(
part
=
ResponseOutputText
(
type
=
"output_text"
,
type
=
"output_text"
,
text
=
""
,
text
=
""
,
annotations
=
[],
annotations
=
[],
...
@@ -1450,13 +1444,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1450,13 +1444,11 @@ class OpenAIServingResponses(OpenAIServing):
sent_output_item_added
=
True
sent_output_item_added
=
True
current_item_id
=
f
"tool_
{
random_uuid
()
}
"
current_item_id
=
f
"tool_
{
random_uuid
()
}
"
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseCodeInterpreterToolCallParam
(
ResponseCodeInterpreterToolCallParam
(
type
=
"code_interpreter_call"
,
type
=
"code_interpreter_call"
,
id
=
current_item_id
,
id
=
current_item_id
,
code
=
None
,
code
=
None
,
...
@@ -1466,7 +1458,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1466,7 +1458,6 @@ class OpenAIServingResponses(OpenAIServing):
),
),
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCodeInterpreterCallInProgressEvent
(
ResponseCodeInterpreterCallInProgressEvent
(
type
=
type
=
"response.code_interpreter_call.in_progress"
,
"response.code_interpreter_call.in_progress"
,
...
@@ -1475,7 +1466,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1475,7 +1466,6 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCodeInterpreterCallCodeDeltaEvent
(
ResponseCodeInterpreterCallCodeDeltaEvent
(
type
=
"response.code_interpreter_call_code.delta"
,
type
=
"response.code_interpreter_call_code.delta"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1495,14 +1485,12 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1495,14 +1485,12 @@ class OpenAIServingResponses(OpenAIServing):
action
=
None
action
=
None
parsed_args
=
json
.
loads
(
previous_item
.
content
[
0
].
text
)
parsed_args
=
json
.
loads
(
previous_item
.
content
[
0
].
text
)
if
function_name
==
"search"
:
if
function_name
==
"search"
:
action
=
(
openai_responses_types
.
action
=
(
response_function_web_search
.
ActionSearch
(
response_function_web_search
.
ActionSearch
(
type
=
"search"
,
type
=
"search"
,
query
=
parsed_args
[
"query"
],
query
=
parsed_args
[
"query"
],
))
))
elif
function_name
==
"open"
:
elif
function_name
==
"open"
:
action
=
(
action
=
(
openai_responses_types
.
response_function_web_search
.
ActionOpenPage
(
response_function_web_search
.
ActionOpenPage
(
type
=
"open_page"
,
type
=
"open_page"
,
# TODO: translate to url
# TODO: translate to url
...
@@ -1510,7 +1498,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1510,7 +1498,6 @@ class OpenAIServingResponses(OpenAIServing):
))
))
elif
function_name
==
"find"
:
elif
function_name
==
"find"
:
action
=
(
action
=
(
openai_responses_types
.
response_function_web_search
.
ActionFind
(
response_function_web_search
.
ActionFind
(
type
=
"find"
,
type
=
"find"
,
pattern
=
parsed_args
[
"pattern"
],
pattern
=
parsed_args
[
"pattern"
],
...
@@ -1523,12 +1510,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1523,12 +1510,11 @@ class OpenAIServingResponses(OpenAIServing):
current_item_id
=
f
"tool_
{
random_uuid
()
}
"
current_item_id
=
f
"tool_
{
random_uuid
()
}
"
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemAddedEvent
(
ResponseOutputItemAddedEvent
(
type
=
"response.output_item.added"
,
type
=
"response.output_item.added"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
response_function_web_search
.
response_function_web_search
.
ResponseFunctionWebSearch
(
ResponseFunctionWebSearch
(
# TODO: generate a unique id for web search call
# TODO: generate a unique id for web search call
type
=
"web_search_call"
,
type
=
"web_search_call"
,
...
@@ -1538,7 +1524,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1538,7 +1524,6 @@ class OpenAIServingResponses(OpenAIServing):
),
),
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseWebSearchCallInProgressEvent
(
ResponseWebSearchCallInProgressEvent
(
type
=
"response.web_search_call.in_progress"
,
type
=
"response.web_search_call.in_progress"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1546,7 +1531,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1546,7 +1531,6 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseWebSearchCallSearchingEvent
(
ResponseWebSearchCallSearchingEvent
(
type
=
"response.web_search_call.searching"
,
type
=
"response.web_search_call.searching"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1556,7 +1540,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1556,7 +1540,6 @@ class OpenAIServingResponses(OpenAIServing):
# enqueue
# enqueue
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseWebSearchCallCompletedEvent
(
ResponseWebSearchCallCompletedEvent
(
type
=
"response.web_search_call.completed"
,
type
=
"response.web_search_call.completed"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1564,12 +1547,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1564,12 +1547,11 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemDoneEvent
(
ResponseOutputItemDoneEvent
(
type
=
"response.output_item.done"
,
type
=
"response.output_item.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseFunctionWebSearch
(
ResponseFunctionWebSearch
(
type
=
"web_search_call"
,
type
=
"web_search_call"
,
id
=
current_item_id
,
id
=
current_item_id
,
action
=
action
,
action
=
action
,
...
@@ -1582,7 +1564,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1582,7 +1564,6 @@ class OpenAIServingResponses(OpenAIServing):
and
previous_item
.
recipient
is
not
None
and
previous_item
.
recipient
is
not
None
and
previous_item
.
recipient
.
startswith
(
"python"
)):
and
previous_item
.
recipient
.
startswith
(
"python"
)):
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCodeInterpreterCallCodeDoneEvent
(
ResponseCodeInterpreterCallCodeDoneEvent
(
type
=
"response.code_interpreter_call_code.done"
,
type
=
"response.code_interpreter_call_code.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1591,7 +1572,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1591,7 +1572,6 @@ class OpenAIServingResponses(OpenAIServing):
code
=
previous_item
.
content
[
0
].
text
,
code
=
previous_item
.
content
[
0
].
text
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCodeInterpreterCallInterpretingEvent
(
ResponseCodeInterpreterCallInterpretingEvent
(
type
=
"response.code_interpreter_call.interpreting"
,
type
=
"response.code_interpreter_call.interpreting"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1599,7 +1579,6 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1599,7 +1579,6 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCodeInterpreterCallCompletedEvent
(
ResponseCodeInterpreterCallCompletedEvent
(
type
=
"response.code_interpreter_call.completed"
,
type
=
"response.code_interpreter_call.completed"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
...
@@ -1607,12 +1586,11 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1607,12 +1586,11 @@ class OpenAIServingResponses(OpenAIServing):
item_id
=
current_item_id
,
item_id
=
current_item_id
,
))
))
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseOutputItemDoneEvent
(
ResponseOutputItemDoneEvent
(
type
=
"response.output_item.done"
,
type
=
"response.output_item.done"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
output_index
=
current_output_index
,
output_index
=
current_output_index
,
item
=
openai_responses_types
.
item
=
ResponseCodeInterpreterToolCallParam
(
ResponseCodeInterpreterToolCallParam
(
type
=
"code_interpreter_call"
,
type
=
"code_interpreter_call"
,
id
=
current_item_id
,
id
=
current_item_id
,
code
=
previous_item
.
content
[
0
].
text
,
code
=
previous_item
.
content
[
0
].
text
,
...
@@ -1633,7 +1611,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1633,7 +1611,7 @@ class OpenAIServingResponses(OpenAIServing):
tokenizer
:
AnyTokenizer
,
tokenizer
:
AnyTokenizer
,
request_metadata
:
RequestResponseMetadata
,
request_metadata
:
RequestResponseMetadata
,
created_time
:
Optional
[
int
]
=
None
,
created_time
:
Optional
[
int
]
=
None
,
)
->
AsyncGenerator
[
BaseModel
,
None
]:
)
->
AsyncGenerator
[
StreamingResponsesResponse
,
None
]:
# TODO:
# TODO:
# 1. Handle disconnect
# 1. Handle disconnect
...
@@ -1641,9 +1619,9 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1641,9 +1619,9 @@ class OpenAIServingResponses(OpenAIServing):
sequence_number
=
0
sequence_number
=
0
T
=
TypeVar
(
"T"
,
bound
=
BaseModel
)
def
_increment_sequence_number_and_return
(
event
:
StreamingResponsesResponse
def
_increment_sequence_number_and_return
(
event
:
T
)
->
T
:
)
->
StreamingResponsesResponse
:
nonlocal
sequence_number
nonlocal
sequence_number
# Set sequence_number if the event has this attribute
# Set sequence_number if the event has this attribute
if
hasattr
(
event
,
'sequence_number'
):
if
hasattr
(
event
,
'sequence_number'
):
...
@@ -1705,7 +1683,7 @@ class OpenAIServingResponses(OpenAIServing):
...
@@ -1705,7 +1683,7 @@ class OpenAIServingResponses(OpenAIServing):
created_time
=
created_time
,
created_time
=
created_time
,
)
)
yield
_increment_sequence_number_and_return
(
yield
_increment_sequence_number_and_return
(
openai_responses_types
.
ResponseCompletedEvent
(
ResponseCompletedEvent
(
type
=
"response.completed"
,
type
=
"response.completed"
,
sequence_number
=-
1
,
sequence_number
=-
1
,
response
=
final_response
.
model_dump
(),
response
=
final_response
.
model_dump
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment