responses_parser.py 4.96 KB
Newer Older
1
2
3
4
5
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import logging
from collections.abc import Callable

6
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
7
8
9
10
11
12
13
14
15
16
from openai.types.responses.response_output_message import ResponseOutputMessage
from openai.types.responses.response_output_text import ResponseOutputText
from openai.types.responses.response_reasoning_item import (
    Content,
    ResponseReasoningItem,
)

from vllm.entrypoints.openai.protocol import ResponseInputOutputItem, ResponsesRequest
from vllm.outputs import CompletionOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
17
from vllm.tokenizers.protocol import TokenizerLike
18
from vllm.tool_parsers.abstract_tool_parser import ToolParser
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import random_uuid

logger = logging.getLogger(__name__)


class ResponsesParser:
    """Incremental parser over completion tokens with reasoning support."""

    def __init__(
        self,
        *,
        tokenizer: AnyTokenizer,
        reasoning_parser_cls: Callable[[AnyTokenizer], ReasoningParser],
        response_messages: list[ResponseInputOutputItem],
        request: ResponsesRequest,
35
        tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None,
36
37
38
39
40
41
42
43
44
45
    ):
        self.response_messages: list[ResponseInputOutputItem] = (
            # TODO: initial messages may not be properly typed
            response_messages
        )
        self.num_init_messages = len(response_messages)
        self.tokenizer = tokenizer
        self.request = request

        self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
46
47
48
        self.tool_parser_instance = None
        if tool_parser_cls is not None:
            self.tool_parser_instance = tool_parser_cls(tokenizer)
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

    def process(self, output: CompletionOutput) -> "ResponsesParser":
        reasoning_content, content = self.reasoning_parser_instance.extract_reasoning(
            output.text, request=self.request
        )
        if reasoning_content:
            self.response_messages.append(
                ResponseReasoningItem(
                    type="reasoning",
                    id=f"rs_{random_uuid()}",
                    summary=[],
                    content=[
                        Content(
                            type="reasoning_text",
                            text=reasoning_content,
                        )
                    ],
                )
            )

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
        function_calls: list[ResponseFunctionToolCall] = []
        if self.tool_parser_instance is not None:
            tool_call_info = self.tool_parser_instance.extract_tool_calls(
                content if content is not None else "",
                request=self.request,  # type: ignore
            )
            if tool_call_info is not None and tool_call_info.tools_called:
                # extract_tool_calls() returns a list of tool calls.
                function_calls.extend(
                    ResponseFunctionToolCall(
                        id=f"fc_{random_uuid()}",
                        call_id=f"call_{random_uuid()}",
                        type="function_call",
                        status="completed",
                        name=tool_call.function.name,
                        arguments=tool_call.function.arguments,
                    )
                    for tool_call in tool_call_info.tool_calls
                )
                content = tool_call_info.content
                if content and content.strip() == "":
                    content = None

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        if content:
            self.response_messages.append(
                ResponseOutputMessage(
                    type="message",
                    id=f"msg_{random_uuid()}",
                    status="completed",
                    role="assistant",
                    content=[
                        ResponseOutputText(
                            annotations=[],  # TODO
                            type="output_text",
                            text=content,
                            logprobs=None,  # TODO
                        )
                    ],
                )
            )
109
110
        if len(function_calls) > 0:
            self.response_messages.extend(function_calls)
111
112
113
114
115
116
117
118
119
120

        return self


def get_responses_parser_for_simple_context(
    *,
    tokenizer: AnyTokenizer,
    reasoning_parser_cls: Callable[[AnyTokenizer], ReasoningParser],
    response_messages: list[ResponseInputOutputItem],
    request: ResponsesRequest,
121
    tool_parser_cls,
122
123
124
125
126
127
128
129
130
131
132
133
) -> ResponsesParser:
    """Factory function to create a ResponsesParser with
    optional reasoning parser.

    Returns:
        ResponsesParser instance configured with the provided parser
    """
    return ResponsesParser(
        tokenizer=tokenizer,
        reasoning_parser_cls=reasoning_parser_cls,
        response_messages=response_messages,
        request=request,
134
        tool_parser_cls=tool_parser_cls,
135
    )