Unverified Commit a685b47c authored by Andrew Xia's avatar Andrew Xia Committed by GitHub
Browse files

[responsesAPI] refactor construct_input_messages (#29359)


Signed-off-by: default avatarAndrew Xia <axia@fb.com>
Co-authored-by: default avatarAndrew Xia <axia@fb.com>
parent 32c40b95
...@@ -94,7 +94,7 @@ from vllm.entrypoints.openai.protocol import ( ...@@ -94,7 +94,7 @@ from vllm.entrypoints.openai.protocol import (
from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.responses_utils import ( from vllm.entrypoints.responses_utils import (
construct_chat_message_with_tool_call, construct_input_messages,
convert_tool_responses_to_completions_format, convert_tool_responses_to_completions_format,
extract_tool_types, extract_tool_types,
) )
...@@ -504,7 +504,12 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -504,7 +504,12 @@ class OpenAIServingResponses(OpenAIServing):
for tool in request.tools for tool in request.tools
] ]
# Construct the input messages. # Construct the input messages.
messages = self._construct_input_messages(request, prev_response) messages = construct_input_messages(
request_instructions=request.instructions,
request_input=request.input,
prev_msg=self.msg_store.get(prev_response.id) if prev_response else None,
prev_response_output=prev_response.output if prev_response else None,
)
_, request_prompts, engine_prompts = await self._preprocess_chat( _, request_prompts, engine_prompts = await self._preprocess_chat(
request, request,
tokenizer, tokenizer,
...@@ -869,47 +874,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -869,47 +874,6 @@ class OpenAIServingResponses(OpenAIServing):
output_items.extend(last_items) output_items.extend(last_items)
return output_items return output_items
def _construct_input_messages(
self,
request: ResponsesRequest,
prev_response: ResponsesResponse | None = None,
) -> list[ChatCompletionMessageParam]:
messages: list[ChatCompletionMessageParam] = []
if request.instructions:
messages.append(
{
"role": "system",
"content": request.instructions,
}
)
# Prepend the conversation history.
if prev_response is not None:
# Add the previous messages.
prev_msg = self.msg_store[prev_response.id]
messages.extend(prev_msg)
# Add the previous output.
for output_item in prev_response.output:
# NOTE: We skip the reasoning output.
if isinstance(output_item, ResponseOutputMessage):
for content in output_item.content:
messages.append(
{
"role": "assistant",
"content": content.text,
}
)
# Append the new input.
# Responses API supports simple text inputs without chat format.
if isinstance(request.input, str):
messages.append({"role": "user", "content": request.input})
else:
for item in request.input:
messages.append(construct_chat_message_with_tool_call(item))
return messages
def _construct_harmony_system_input_message( def _construct_harmony_system_input_message(
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str] self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
) -> OpenAIHarmonyMessage: ) -> OpenAIHarmonyMessage:
......
...@@ -9,7 +9,8 @@ from openai.types.chat import ( ...@@ -9,7 +9,8 @@ from openai.types.chat import (
from openai.types.chat.chat_completion_message_tool_call_param import ( from openai.types.chat.chat_completion_message_tool_call_param import (
Function as FunctionCallTool, Function as FunctionCallTool,
) )
from openai.types.responses import ResponseFunctionToolCall from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
from openai.types.responses.response_output_message import ResponseOutputMessage
from openai.types.responses.response_reasoning_item import ResponseReasoningItem from openai.types.responses.response_reasoning_item import ResponseReasoningItem
from openai.types.responses.tool import Tool from openai.types.responses.tool import Tool
...@@ -20,6 +21,49 @@ from vllm.entrypoints.openai.protocol import ( ...@@ -20,6 +21,49 @@ from vllm.entrypoints.openai.protocol import (
) )
def construct_input_messages(
*,
request_instructions: str | None = None,
request_input: str | list[ResponseInputOutputItem],
prev_msg: list[ChatCompletionMessageParam] | None = None,
prev_response_output: list[ResponseOutputItem] | None = None,
):
messages: list[ChatCompletionMessageParam] = []
if request_instructions:
messages.append(
{
"role": "system",
"content": request_instructions,
}
)
# Prepend the conversation history.
if prev_msg is not None:
# Add the previous messages.
messages.extend(prev_msg)
if prev_response_output is not None:
# Add the previous output.
for output_item in prev_response_output:
# NOTE: We skip the reasoning output.
if isinstance(output_item, ResponseOutputMessage):
for content in output_item.content:
messages.append(
{
"role": "assistant",
"content": content.text,
}
)
# Append the new input.
# Responses API supports simple text inputs without chat format.
if isinstance(request_input, str):
messages.append({"role": "user", "content": request_input})
else:
for item in request_input:
messages.append(construct_chat_message_with_tool_call(item))
return messages
def construct_chat_message_with_tool_call( def construct_chat_message_with_tool_call(
item: ResponseInputOutputItem, item: ResponseInputOutputItem,
) -> ChatCompletionMessageParam: ) -> ChatCompletionMessageParam:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment