test_longcat_tool_parser.py 3.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from unittest.mock import MagicMock

import pytest

from tests.tool_parsers.common_tests import (
    ToolParserTestConfig,
    ToolParserTests,
)
from vllm.tokenizers import TokenizerLike


class TestLongCatToolParser(ToolParserTests):
    @pytest.fixture
    def tokenizer(self, default_tokenizer: TokenizerLike) -> TokenizerLike:
        """Add some longcat specific tokens to the default vocab."""
        tokenizer = default_tokenizer
        tokenizer_vocab = tokenizer.get_vocab()
        tokenizer.get_vocab = MagicMock()
        tokenizer_vocab.update(
            {
                "<longcat_tool_call>": 32000,
                "</longcat_tool_call>": 32001,
            }
        )
        tokenizer.get_vocab.return_value = tokenizer_vocab
        return tokenizer

    @pytest.fixture
    def test_config(self) -> ToolParserTestConfig:
        return ToolParserTestConfig(
            parser_name="longcat",
            # Test data
            no_tool_calls_output="This is a regular response without any tool calls.",
            single_tool_call_output=(
                '<longcat_tool_call>{"name": "get_weather", '
                '"arguments": {"city": "Tokyo"}}</longcat_tool_call>'
            ),
            parallel_tool_calls_output=(
                '<longcat_tool_call>{"name": "get_weather", '
                '"arguments": {"city": "Tokyo"}}</longcat_tool_call>\n'
                '<longcat_tool_call>{"name": "get_time", '
                '"arguments": {"timezone": "Asia/Tokyo"}}</longcat_tool_call>'
            ),
            various_data_types_output="""<longcat_tool_call>{
  "name": "test_function",
  "arguments": {
    "string_field": "hello",
    "int_field": 42,
    "float_field": 3.14,
    "bool_field": true,
    "null_field": null,
    "array_field": ["a", "b", "c"],
    "object_field": {"nested": "value"},
    "empty_array": [],
    "empty_object": {}
  }
}</longcat_tool_call>""",
            empty_arguments_output=(
                '<longcat_tool_call>{"name": "refresh", "arguments": {}}'
                "</longcat_tool_call>"
            ),
            surrounding_text_output=(
                "Let me check the weather for you.\n"
                '<longcat_tool_call>{"name": "get_weather", '
                '"arguments": {"city": "Tokyo"}}</longcat_tool_call>\n'
                "Here is the result."
            ),
            escaped_strings_output="""<longcat_tool_call>{
  "name": "test_function",
  "arguments": {
    "quoted": "He said \\"hello\\"",
    "path": "C:\\\\Users\\\\file.txt",
    "newline": "line1\\nline2",
    "unicode": "emoji: 🎉"
  }
}</longcat_tool_call>""",
            malformed_input_outputs=[
                '<longcat_tool_call>{"name": "func", "arguments": {',
                (
                    '<longcat_tool_call>{"name": "func", '
                    '"arguments": "not a dict"}</longcat_tool_call>'
                ),
                "Some text with <longcat_tool_call>invalid json",
            ],
            # Expected results
            single_tool_call_expected_name="get_weather",
            single_tool_call_expected_args={"city": "Tokyo"},
            single_tool_call_expected_content=None,
            parallel_tool_calls_count=2,
            parallel_tool_calls_names=["get_weather", "get_time"],
            # xfail markers
            xfail_streaming={
                "test_malformed_input": "Streaming has complex buffering behavior",
            },
            xfail_nonstreaming={},
            # Configuration
            allow_empty_or_json_empty_args=True,
        )