# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import datetime from typing import Literal, Optional from openai.types.responses.tool import Tool from openai_harmony import (Conversation, DeveloperContent, HarmonyEncodingName, Message, ReasoningEffort, Role, StreamableParser, SystemContent, TextContent, ToolDescription, load_harmony_encoding) REASONING_EFFORT = { "high": ReasoningEffort.HIGH, "medium": ReasoningEffort.MEDIUM, "low": ReasoningEffort.LOW, } _harmony_encoding = None def get_encoding(): global _harmony_encoding if _harmony_encoding is None: _harmony_encoding = load_harmony_encoding( HarmonyEncodingName.HARMONY_GPT_OSS) return _harmony_encoding def get_system_message( model_identity: Optional[str] = None, reasoning_effort: Optional[Literal["high", "medium", "low"]] = None, start_date: Optional[str] = None, browser_description: Optional[str] = None, python_description: Optional[str] = None, ) -> Message: sys_msg_content = SystemContent.new() if model_identity is not None: sys_msg_content = sys_msg_content.with_model_identity(model_identity) if reasoning_effort is not None: sys_msg_content = sys_msg_content.with_reasoning_effort( REASONING_EFFORT[reasoning_effort]) if start_date is None: # NOTE(woosuk): This brings non-determinism in vLLM. Be careful. start_date = datetime.datetime.now().strftime("%Y-%m-%d") sys_msg_content = sys_msg_content.with_conversation_start_date(start_date) if browser_description is not None: sys_msg_content = sys_msg_content.with_tools(browser_description) if python_description is not None: sys_msg_content = sys_msg_content.with_tools(python_description) sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content) return sys_msg def get_developer_message(instructions: Optional[str] = None, tools: Optional[list[Tool]] = None) -> Message: dev_msg_content = DeveloperContent.new() if instructions is not None: dev_msg_content = dev_msg_content.with_instructions(instructions) if tools is not None: function_tools = [] for tool in tools: if tool.type in ("web_search_preview", "code_interpreter"): # These are built-in tools that are added to the system message. pass elif tool.type == "function": function_tools.append(tool) else: raise ValueError(f"tool type {tool.type} not supported") if function_tools: function_tool_descriptions = [ ToolDescription.new( name=tool.name, description=tool.description, parameters=tool.parameters, ) for tool in function_tools ] dev_msg_content = dev_msg_content.with_function_tools( function_tool_descriptions) dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content) return dev_msg def get_user_message(content: str) -> Message: return Message.from_role_and_content(Role.USER, content) def parse_chat_input(chat_msg) -> Message: role = chat_msg["role"] content = chat_msg["content"] if isinstance(content, str): contents = [TextContent(text=content)] else: # TODO: Support refusal. contents = [TextContent(text=c["text"]) for c in content] msg = Message.from_role_and_contents(role, contents) return msg def render_for_completion(messages: list[Message]) -> list[int]: conversation = Conversation.from_messages(messages) token_ids = get_encoding().render_conversation_for_completion( conversation, Role.ASSISTANT) return token_ids def get_stop_tokens_for_assistant_actions() -> list[int]: return get_encoding().stop_tokens_for_assistant_actions() def get_streamable_parser_for_assistant() -> StreamableParser: return StreamableParser(get_encoding(), role=Role.ASSISTANT)