phi4mini_tool_parser.py 3.99 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5

import json
from collections.abc import Sequence
6
from typing import Any
7

8
import regex as re
9
10
from transformers import PreTrainedTokenizerBase

11
from vllm.entrypoints.chat_utils import make_tool_call_id
12
from vllm.entrypoints.openai.chat_completion.protocol import (
13
    ChatCompletionRequest,
14
15
)
from vllm.entrypoints.openai.engine.protocol import (
16
17
18
19
20
    DeltaMessage,
    ExtractedToolCallInformation,
    FunctionCall,
    ToolCall,
)
21
22
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
23
24
    ToolParser,
)
25
26
27
28
29
30
31
32
33

logger = init_logger(__name__)


class Phi4MiniJsonToolParser(ToolParser):
    """
    Tool call parser for phi-4-mini models intended for use with the
    examples/tool_chat_template_llama.jinja template.

34
    Used when --enable-auto-tool-choice --tool-call-parser phi4_mini_json
35
36
37
38
39
40
41
42
43
44
45
    are all set
    """

    def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict[str, Any]] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
46
47
48
        self.streamed_args_for_tool: list[
            str
        ] = []  # map what has been streamed for each tool so far to a list
49
50
51
        self.bot_token: str = "functools"

    def extract_tool_calls(
52
53
        self, model_output: str, request: ChatCompletionRequest
    ) -> ExtractedToolCallInformation:
54
55
56
        """
        Extract the tool calls from a complete model response.
        """
57
        logger.debug("Model output: %s", model_output)
58

59
        pattern = r"functools\[(.*?)\]"
60
61
62
        matches = re.search(pattern, model_output, re.DOTALL)

        if not matches:
63
            logger.debug("No function calls found")
64
65
66
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
67
68
69
70

        try:
            function_call_arr: list[dict[str, Any]] = []
            try:
71
                json_content = "[" + matches.group(1) + "]"
72
73

                function_call_arr = json.loads(json_content)
74
75
76
                logger.debug(
                    "Successfully extracted %d function calls", len(function_call_arr)
                )
77
            except json.JSONDecodeError as e:
78
                logger.error(
79
80
81
                    "Failed to parse function calls from model output. Error: %s",
                    str(e),
                )
82
83
84

            tool_calls: list[ToolCall] = [
                ToolCall(
85
                    id=make_tool_call_id(),
86
87
88
89
90
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
91
                            raw_function_call["arguments"]
92
93
94
95
96
97
98
                            if "arguments" in raw_function_call
                            else raw_function_call["parameters"],
                            ensure_ascii=False,
                        ),
                    ),
                )
                for raw_function_call in function_call_arr
99
100
101
            ]

            # get any content before the tool call
102
103
104
            ret = ExtractedToolCallInformation(
                tools_called=True, tool_calls=tool_calls, content=None
            )
105
106
107
            return ret

        except Exception:
108
109
110
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
111
112
113
114
115
116
117
118
119
120

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
121
    ) -> DeltaMessage | None:
122
        return None