phi4mini_tool_parser.py 4.07 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5

import json
from collections.abc import Sequence
6
from typing import Any
7

8
import regex as re
9
10
from transformers import PreTrainedTokenizerBase

11
from vllm.entrypoints.chat_utils import make_tool_call_id
12
from vllm.entrypoints.openai.chat_completion.protocol import (
13
    ChatCompletionRequest,
14
15
)
from vllm.entrypoints.openai.engine.protocol import (
16
17
18
19
20
    DeltaMessage,
    ExtractedToolCallInformation,
    FunctionCall,
    ToolCall,
)
21
22
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
23
    Tool,
24
25
    ToolParser,
)
26
27
28
29
30
31
32
33
34

logger = init_logger(__name__)


class Phi4MiniJsonToolParser(ToolParser):
    """
    Tool call parser for phi-4-mini models intended for use with the
    examples/tool_chat_template_llama.jinja template.

35
    Used when --enable-auto-tool-choice --tool-call-parser phi4_mini_json
36
37
38
    are all set
    """

39
40
41
42
43
44
    def __init__(
        self,
        tokenizer: PreTrainedTokenizerBase,
        tools: list[Tool] | None = None,
    ) -> None:
        super().__init__(tokenizer, tools)
45
46
47
48
49
50

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict[str, Any]] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
51
52
53
        self.streamed_args_for_tool: list[
            str
        ] = []  # map what has been streamed for each tool so far to a list
54
55
56
        self.bot_token: str = "functools"

    def extract_tool_calls(
57
58
        self, model_output: str, request: ChatCompletionRequest
    ) -> ExtractedToolCallInformation:
59
60
61
        """
        Extract the tool calls from a complete model response.
        """
62
        logger.debug("Model output: %s", model_output)
63

64
        pattern = r"functools\[(.*?)\]"
65
66
67
        matches = re.search(pattern, model_output, re.DOTALL)

        if not matches:
68
            logger.debug("No function calls found")
69
70
71
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
72
73
74
75

        try:
            function_call_arr: list[dict[str, Any]] = []
            try:
76
                json_content = "[" + matches.group(1) + "]"
77
78

                function_call_arr = json.loads(json_content)
79
80
81
                logger.debug(
                    "Successfully extracted %d function calls", len(function_call_arr)
                )
82
            except json.JSONDecodeError as e:
83
                logger.error(
84
85
86
                    "Failed to parse function calls from model output. Error: %s",
                    str(e),
                )
87
88
89

            tool_calls: list[ToolCall] = [
                ToolCall(
90
                    id=make_tool_call_id(),
91
92
93
94
95
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
96
                            raw_function_call["arguments"]
97
98
99
100
101
102
103
                            if "arguments" in raw_function_call
                            else raw_function_call["parameters"],
                            ensure_ascii=False,
                        ),
                    ),
                )
                for raw_function_call in function_call_arr
104
105
106
            ]

            # get any content before the tool call
107
108
109
            ret = ExtractedToolCallInformation(
                tools_called=True, tool_calls=tool_calls, content=None
            )
110
111
112
            return ret

        except Exception:
113
114
115
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
116
117
118
119
120
121
122
123
124
125

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
126
    ) -> DeltaMessage | None:
127
        return None