"csrc/vscode:/vscode.git/clone" did not exist on "d1e82408759067eca0ae55e548f6243a9e0aa12d"
phi4mini_tool_parser.py 3.92 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5

import json
from collections.abc import Sequence
6
from typing import Any
7

8
import regex as re
9
10
from transformers import PreTrainedTokenizerBase

11
from vllm.entrypoints.chat_utils import make_tool_call_id
12
13
14
15
16
17
18
from vllm.entrypoints.openai.protocol import (
    ChatCompletionRequest,
    DeltaMessage,
    ExtractedToolCallInformation,
    FunctionCall,
    ToolCall,
)
19
20
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
21
22
    ToolParser,
)
23
24
25
26
27
28
29
30
31

logger = init_logger(__name__)


class Phi4MiniJsonToolParser(ToolParser):
    """
    Tool call parser for phi-4-mini models intended for use with the
    examples/tool_chat_template_llama.jinja template.

32
    Used when --enable-auto-tool-choice --tool-call-parser phi4_mini_json
33
34
35
36
37
38
39
40
41
42
43
    are all set
    """

    def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict[str, Any]] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
44
45
46
        self.streamed_args_for_tool: list[
            str
        ] = []  # map what has been streamed for each tool so far to a list
47
48
49
        self.bot_token: str = "functools"

    def extract_tool_calls(
50
51
        self, model_output: str, request: ChatCompletionRequest
    ) -> ExtractedToolCallInformation:
52
53
54
        """
        Extract the tool calls from a complete model response.
        """
55
        logger.debug("Model output: %s", model_output)
56

57
        pattern = r"functools\[(.*?)\]"
58
59
60
        matches = re.search(pattern, model_output, re.DOTALL)

        if not matches:
61
            logger.debug("No function calls found")
62
63
64
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
65
66
67
68

        try:
            function_call_arr: list[dict[str, Any]] = []
            try:
69
                json_content = "[" + matches.group(1) + "]"
70
71

                function_call_arr = json.loads(json_content)
72
73
74
                logger.debug(
                    "Successfully extracted %d function calls", len(function_call_arr)
                )
75
            except json.JSONDecodeError as e:
76
                logger.error(
77
78
79
                    "Failed to parse function calls from model output. Error: %s",
                    str(e),
                )
80
81
82

            tool_calls: list[ToolCall] = [
                ToolCall(
83
                    id=make_tool_call_id(),
84
85
86
87
88
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
89
                            raw_function_call["arguments"]
90
91
92
93
94
95
96
                            if "arguments" in raw_function_call
                            else raw_function_call["parameters"],
                            ensure_ascii=False,
                        ),
                    ),
                )
                for raw_function_call in function_call_arr
97
98
99
            ]

            # get any content before the tool call
100
101
102
            ret = ExtractedToolCallInformation(
                tools_called=True, tool_calls=tool_calls, content=None
            )
103
104
105
            return ret

        except Exception:
106
107
108
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
109
110
111
112
113
114
115
116
117
118

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
119
    ) -> DeltaMessage | None:
120
        return None