Unverified Commit 404fc4bf authored by daniel-salib's avatar daniel-salib Committed by GitHub
Browse files

[Frontend] refactor harmony utils output message parsing (#29820)


Signed-off-by: default avatarDaniel Salib <danielsalib@meta.com>
parent 82a64b3d
...@@ -328,32 +328,16 @@ def render_for_completion(messages: list[Message]) -> list[int]: ...@@ -328,32 +328,16 @@ def render_for_completion(messages: list[Message]) -> list[int]:
return token_ids return token_ids
def parse_output_message(message: Message) -> list[ResponseOutputItem]: def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
""" """Parse browser tool calls (search, open, find) into web search items."""
Parse a Harmony message into a list of output response items.
"""
if message.author.role != "assistant":
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return []
output_items: list[ResponseOutputItem] = []
recipient = message.recipient
if recipient is not None and recipient.startswith("browser."):
if len(message.content) != 1: if len(message.content) != 1:
raise ValueError("Invalid number of contents in browser message") raise ValueError("Invalid number of contents in browser message")
content = message.content[0] content = message.content[0]
# We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY
# env variable since if it is not set, we are certain the json is valid # Parse JSON args (with retry detection)
# The use of Actions for web search will be removed entirely in
# the future, so this is only necessary temporarily
try: try:
browser_call = json.loads(content.text) browser_call = json.loads(content.text)
except json.JSONDecodeError: except json.JSONDecodeError:
# If the content is not valid JSON, then it was
# caught and retried by vLLM, which means we
# need to make note of that so the user is aware
json_retry_output_message = ( json_retry_output_message = (
f"Invalid JSON args, caught and retried: {content.text}" f"Invalid JSON args, caught and retried: {content.text}"
) )
...@@ -362,7 +346,8 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: ...@@ -362,7 +346,8 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
"url": json_retry_output_message, "url": json_retry_output_message,
"pattern": json_retry_output_message, "pattern": json_retry_output_message,
} }
# TODO: translate to url properly!
# Create appropriate action based on recipient
if recipient == "browser.search": if recipient == "browser.search":
action = ActionSearch( action = ActionSearch(
query=f"cursor:{browser_call.get('query', '')}", type="search" query=f"cursor:{browser_call.get('query', '')}", type="search"
...@@ -373,36 +358,25 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: ...@@ -373,36 +358,25 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
) )
elif recipient == "browser.find": elif recipient == "browser.find":
action = ActionFind( action = ActionFind(
pattern=browser_call["pattern"], pattern=browser_call.get("pattern", ""),
url=f"cursor:{browser_call.get('url', '')}", url=f"cursor:{browser_call.get('url', '')}",
type="find", type="find",
) )
else: else:
raise ValueError(f"Unknown browser action: {recipient}") raise ValueError(f"Unknown browser action: {recipient}")
web_search_item = ResponseFunctionWebSearch(
return ResponseFunctionWebSearch(
id=f"ws_{random_uuid()}", id=f"ws_{random_uuid()}",
action=action, action=action,
status="completed", status="completed",
type="web_search_call", type="web_search_call",
) )
output_items.append(web_search_item)
elif message.channel == "analysis":
for content in message.content: def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
reasoning_item = ResponseReasoningItem( """Parse function calls into function tool call items."""
id=f"rs_{random_uuid()}",
summary=[],
type="reasoning",
content=[
ResponseReasoningTextContent(
text=content.text, type="reasoning_text"
)
],
status=None,
)
output_items.append(reasoning_item)
elif message.channel == "commentary":
if recipient is not None and recipient.startswith("functions."):
function_name = recipient.split(".")[-1] function_name = recipient.split(".")[-1]
output_items = []
for content in message.content: for content in message.content:
random_id = random_uuid() random_id = random_uuid()
response_item = ResponseFunctionToolCall( response_item = ResponseFunctionToolCall(
...@@ -413,27 +387,28 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: ...@@ -413,27 +387,28 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
id=f"fc_{random_id}", id=f"fc_{random_id}",
) )
output_items.append(response_item) output_items.append(response_item)
elif recipient is not None and ( return output_items
recipient.startswith("python")
or recipient.startswith("browser")
or recipient.startswith("container") def _parse_reasoning_content(message: Message) -> list[ResponseOutputItem]:
): """Parse reasoning/analysis content into reasoning items."""
output_items = []
for content in message.content: for content in message.content:
reasoning_item = ResponseReasoningItem( reasoning_item = ResponseReasoningItem(
id=f"rs_{random_uuid()}", id=f"rs_{random_uuid()}",
summary=[], summary=[],
type="reasoning", type="reasoning",
content=[ content=[
ResponseReasoningTextContent( ResponseReasoningTextContent(text=content.text, type="reasoning_text")
text=content.text, type="reasoning_text"
)
], ],
status=None, status=None,
) )
output_items.append(reasoning_item) output_items.append(reasoning_item)
else: return output_items
raise ValueError(f"Unknown recipient: {recipient}")
elif message.channel == "final":
def _parse_final_message(message: Message) -> ResponseOutputItem:
"""Parse final channel messages into output message items."""
contents = [] contents = []
for content in message.content: for content in message.content:
output_text = ResponseOutputText( output_text = ResponseOutputText(
...@@ -443,16 +418,59 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: ...@@ -443,16 +418,59 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
logprobs=None, # TODO logprobs=None, # TODO
) )
contents.append(output_text) contents.append(output_text)
text_item = ResponseOutputMessage( return ResponseOutputMessage(
id=f"msg_{random_uuid()}", id=f"msg_{random_uuid()}",
content=contents, content=contents,
role=message.author.role, role=message.author.role,
status="completed", status="completed",
type="message", type="message",
) )
output_items.append(text_item)
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
"""
Parse a Harmony message into a list of output response items.
"""
if message.author.role != "assistant":
# This is a message from a tool to the assistant (e.g., search result).
# Don't include it in the final output for now. This aligns with
# OpenAI's behavior on models like o4-mini.
return []
output_items: list[ResponseOutputItem] = []
recipient = message.recipient
# Browser tool calls
if recipient is not None and recipient.startswith("browser."):
output_items.append(_parse_browser_tool_call(message, recipient))
# Analysis channel (reasoning/chain-of-thought)
elif message.channel == "analysis":
output_items.extend(_parse_reasoning_content(message))
# Commentary channel
elif message.channel == "commentary":
# Function calls
if recipient is not None and recipient.startswith("functions."):
output_items.extend(_parse_function_call(message, recipient))
# Built-in tools on commentary channel are treated as reasoning for now
elif recipient is not None and (
recipient.startswith("python")
or recipient.startswith("browser")
or recipient.startswith("container")
):
output_items.extend(_parse_reasoning_content(message))
else:
raise ValueError(f"Unknown recipient: {recipient}")
# Final output message
elif message.channel == "final":
output_items.append(_parse_final_message(message))
else: else:
raise ValueError(f"Unknown channel: {message.channel}") raise ValueError(f"Unknown channel: {message.channel}")
return output_items return output_items
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment