Unverified Commit 25aba2b6 authored by Andrew Xia's avatar Andrew Xia Committed by GitHub
Browse files

[gpt-oss] Add IncompleteDetails to ResponsesRepsonse (#24561)


Signed-off-by: default avatarAndrew Xia <axia@meta.com>
parent 94b03f88
......@@ -74,6 +74,20 @@ async def test_basic_with_reasoning_effort(client: OpenAI, model_name: str):
assert response.status == "completed"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_max_tokens(client: OpenAI, model_name: str):
response = await client.responses.create(
model=model_name,
input="What is the first paragraph of Moby Dick?",
reasoning={"effort": "low"},
max_output_tokens=30,
)
assert response is not None
assert response.status == "incomplete"
assert response.incomplete_details.reason == "max_output_tokens"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_chat(client: OpenAI, model_name: str):
......
......@@ -112,6 +112,7 @@ class HarmonyContext(ConversationContext):
available_tools: list[str],
):
self._messages = messages
self.finish_reason: Optional[str] = None
self.available_tools = available_tools
self._tool_sessions: dict[str, Union[ClientSession, Tool]] = {}
self.called_tools: set[str] = set()
......@@ -135,7 +136,8 @@ class HarmonyContext(ConversationContext):
if self.parser.current_channel in {"analysis", "commentary"}:
self.num_reasoning_tokens += 1
def append_output(self, output) -> None:
def append_output(self, output: Union[RequestOutput,
list[Message]]) -> None:
if isinstance(output, RequestOutput):
output_token_ids = output.outputs[0].token_ids
self.parser = get_streamable_parser_for_assistant()
......@@ -150,6 +152,8 @@ class HarmonyContext(ConversationContext):
# Move current turn to previous turn for next turn's calculations
self.previous_turn = self.current_turn.copy()
output_msgs = self.parser.messages
# The responses finish reason is set in the last message
self.finish_reason = output.outputs[0].finish_reason
else:
# Tool output.
output_msgs = output
......@@ -385,7 +389,8 @@ class StreamingHarmonyContext(HarmonyContext):
def messages(self) -> list:
return self.parser.messages
def append_output(self, output) -> None:
def append_output(self, output: Union[RequestOutput,
list[Message]]) -> None:
if isinstance(output, RequestOutput):
# append_output is called for each output token in streaming case,
# so we only want to add the prompt tokens once for each message.
......
......@@ -387,7 +387,9 @@ def parse_remaining_state(
id=f"msg_{random_uuid()}",
content=[output_text],
role="assistant",
status="completed",
# if the parser still has messages (ie if the generator got cut
# abruptly), this should be incomplete
status="incomplete",
type="message",
)
return [text_item]
......
......@@ -30,7 +30,7 @@ except ImportError: # For newer openai versions (>= 1.100.0)
from openai.types.responses import (ResponseFormatTextConfig as
ResponseTextConfig)
from openai.types.responses.response import ToolChoice
from openai.types.responses.response import IncompleteDetails, ToolChoice
from openai.types.responses.tool import Tool
from openai.types.shared import Metadata, Reasoning
from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
......@@ -1868,7 +1868,7 @@ class ResponsesResponse(OpenAIBaseModel):
id: str = Field(default_factory=lambda: f"resp_{random_uuid()}")
created_at: int = Field(default_factory=lambda: int(time.time()))
# error: Optional[ResponseError] = None
# incomplete_details: Optional[IncompleteDetails] = None
incomplete_details: Optional[IncompleteDetails] = None
instructions: Optional[str] = None
metadata: Optional[Metadata] = None
model: str
......@@ -1904,9 +1904,18 @@ class ResponsesResponse(OpenAIBaseModel):
status: ResponseStatus,
usage: Optional[ResponseUsage] = None,
) -> "ResponsesResponse":
incomplete_details: Optional[IncompleteDetails] = None
if status == 'incomplete':
incomplete_details = IncompleteDetails(reason='max_output_tokens')
# TODO: implement the other reason for incomplete_details,
# which is content_filter
# incomplete_details = IncompleteDetails(reason='content_filter')
return cls(
id=request.request_id,
created_at=created_time,
incomplete_details=incomplete_details,
instructions=request.instructions,
metadata=request.metadata,
model=model_name,
......
......@@ -27,7 +27,7 @@ from openai.types.responses import (ResponseCreatedEvent,
ResponseReasoningItem,
ResponseReasoningTextDeltaEvent,
ResponseReasoningTextDoneEvent,
response_text_delta_event)
ResponseStatus, response_text_delta_event)
from openai.types.responses.response_output_text import (Logprob,
LogprobTopLogprob)
# yapf: enable
......@@ -461,10 +461,22 @@ class OpenAIServingResponses(OpenAIServing):
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))
# NOTE: Implementation of stauts is still WIP, but for now
# we guarantee that if the status is not "completed", it is accurate.
# "completed" is implemented as the "catch-all" for now.
status: ResponseStatus = "completed"
if self.use_harmony:
assert isinstance(context, HarmonyContext)
output = self._make_response_output_items_with_harmony(context)
num_tool_output_tokens = context.num_tool_output_tokens
if len(output) > 0:
if context.finish_reason == "length":
status = "incomplete"
elif context.finish_reason == "abort":
status = "cancelled"
else:
status = "incomplete"
else:
assert isinstance(context, SimpleContext)
final_res = context.last_output
......@@ -501,7 +513,7 @@ class OpenAIServingResponses(OpenAIServing):
model_name=model_name,
created_time=created_time,
output=output,
status="completed",
status=status,
usage=usage,
)
......@@ -658,7 +670,7 @@ class OpenAIServingResponses(OpenAIServing):
self,
context: HarmonyContext,
) -> list[ResponseOutputItem]:
output_items = []
output_items: list[ResponseOutputItem] = []
num_init_messages = context.num_init_messages
for msg in context.messages[num_init_messages:]:
output_items.extend(parse_output_message(msg))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment