# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import datetime from collections.abc import Iterable from typing import Literal, Optional from openai.types.responses.tool import Tool from openai_harmony import (Conversation, DeveloperContent, HarmonyEncodingName, Message, ReasoningEffort, Role, StreamableParser, SystemContent, TextContent, ToolDescription, load_harmony_encoding) REASONING_EFFORT = { "high": ReasoningEffort.HIGH, "medium": ReasoningEffort.MEDIUM, "low": ReasoningEffort.LOW, } _harmony_encoding = None def get_encoding(): global _harmony_encoding if _harmony_encoding is None: _harmony_encoding = load_harmony_encoding( HarmonyEncodingName.HARMONY_GPT_OSS) return _harmony_encoding def get_system_message( model_identity: Optional[str] = None, reasoning_effort: Optional[Literal["high", "medium", "low"]] = None, start_date: Optional[str] = None, browser_description: Optional[str] = None, python_description: Optional[str] = None, ) -> Message: sys_msg_content = SystemContent.new() if model_identity is not None: sys_msg_content = sys_msg_content.with_model_identity(model_identity) if reasoning_effort is not None: sys_msg_content = sys_msg_content.with_reasoning_effort( REASONING_EFFORT[reasoning_effort]) if start_date is None: # NOTE(woosuk): This brings non-determinism in vLLM. Be careful. start_date = datetime.datetime.now().strftime("%Y-%m-%d") sys_msg_content = sys_msg_content.with_conversation_start_date(start_date) if browser_description is not None: sys_msg_content = sys_msg_content.with_tools(browser_description) if python_description is not None: sys_msg_content = sys_msg_content.with_tools(python_description) sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content) return sys_msg def get_developer_message(instructions: Optional[str] = None, tools: Optional[list[Tool]] = None) -> Message: dev_msg_content = DeveloperContent.new() if instructions is not None: dev_msg_content = dev_msg_content.with_instructions(instructions) if tools is not None: function_tools = [] for tool in tools: if tool.type in ("web_search_preview", "code_interpreter"): # These are built-in tools that are added to the system message. pass elif tool.type == "function": function_tools.append(tool) else: raise ValueError(f"tool type {tool.type} not supported") if function_tools: function_tool_descriptions = [ ToolDescription.new( name=tool.name, description=tool.description, parameters=tool.parameters, ) for tool in function_tools ] dev_msg_content = dev_msg_content.with_function_tools( function_tool_descriptions) dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content) return dev_msg def get_user_message(content: str) -> Message: return Message.from_role_and_content(Role.USER, content) def parse_chat_input(chat_msg) -> Message: role = chat_msg["role"] content = chat_msg["content"] if isinstance(content, str): contents = [TextContent(text=content)] else: # TODO: Support refusal. contents = [TextContent(text=c["text"]) for c in content] msg = Message.from_role_and_contents(role, contents) return msg def render_for_completion(messages: list[Message]) -> list[int]: conversation = Conversation.from_messages(messages) token_ids = get_encoding().render_conversation_for_completion( conversation, Role.ASSISTANT) return token_ids def get_stop_tokens_for_assistant_actions() -> list[int]: return get_encoding().stop_tokens_for_assistant_actions() def get_streamable_parser_for_assistant() -> StreamableParser: return StreamableParser(get_encoding(), role=Role.ASSISTANT) def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: parser = get_streamable_parser_for_assistant() for token_id in token_ids: parser.process(token_id) return parser def parse_chat_output( token_ids: list[int]) -> tuple[Optional[str], Optional[str], bool]: parser = parse_output_into_messages(token_ids) output_msgs = parser.messages if len(output_msgs) == 0: # The generation has stopped during reasoning. is_tool_call = False reasoning_content = parser.current_content final_content = None elif len(output_msgs) == 1: # The generation has stopped during final message. is_tool_call = False reasoning_content = output_msgs[0].content[0].text final_content = parser.current_content else: if len(output_msgs) != 2: raise ValueError( "Expected 2 output messages (reasoning and final), " f"but got {len(output_msgs)}.") reasoning_msg, final_msg = output_msgs reasoning_content = reasoning_msg.content[0].text final_content = final_msg.content[0].text is_tool_call = final_msg.recipient is not None return reasoning_content, final_content, is_tool_call