tool_parser_wrapper_tokens.rs 8.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Wrapper Token Tests
//!
//! Tests for JSON parser with custom wrapper tokens

use sglang_router_rs::tool_parser::{JsonParser, TokenConfig, ToolParser};

#[tokio::test]
async fn test_json_with_xml_style_wrapper() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string()],
        end_tokens: vec!["</tool>".to_string()],
        separator: ", ".to_string(),
    });

    let input =
        r#"Some text before <tool>{"name": "test", "arguments": {"x": 1}}</tool> and after"#;

18
19
20
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
21

22
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
23
24
25
26
27
28
29
30
31
32
33
34
    assert_eq!(args["x"], 1);
}

#[tokio::test]
async fn test_json_with_multiple_wrapper_pairs() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string(), "<<TOOL>>".to_string()],
        end_tokens: vec!["</tool>".to_string(), "<</TOOL>>".to_string()],
        separator: ", ".to_string(),
    });

    let input1 = r#"<tool>{"name": "tool1", "arguments": {}}</tool>"#;
35
36
37
    let (_normal_text, tools1) = parser.parse_complete(input1).await.unwrap();
    assert_eq!(tools1.len(), 1);
    assert_eq!(tools1[0].function.name, "tool1");
38
39

    let input2 = r#"<<TOOL>>{"name": "tool2", "arguments": {}}<</TOOL>>"#;
40
41
42
    let (_normal_text, tools2) = parser.parse_complete(input2).await.unwrap();
    assert_eq!(tools2.len(), 1);
    assert_eq!(tools2[0].function.name, "tool2");
43
44
45
46
47
48
49
50
51
52
53
54
}

#[tokio::test]
async fn test_json_with_only_start_token() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![">>>FUNCTION:".to_string()],
        end_tokens: vec!["".to_string()], // Empty end token
        separator: ", ".to_string(),
    });

    let input = r#"Some preamble >>>FUNCTION:{"name": "execute", "arguments": {"cmd": "ls"}}"#;

55
56
57
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "execute");
58
59
60
61
62
63
64
65
66
67
68
69
70
}

#[tokio::test]
async fn test_json_with_custom_separator() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["[FUNC]".to_string()],
        end_tokens: vec!["[/FUNC]".to_string()],
        separator: " | ".to_string(), // Custom separator
    });

    // Though we're not testing multiple tools here, the separator is configured
    let input = r#"[FUNC]{"name": "test", "arguments": {}}[/FUNC]"#;

71
72
73
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
}

#[tokio::test]
async fn test_json_with_nested_wrapper_tokens_in_content() {
    // Known limitation: When wrapper tokens appear inside JSON strings,
    // the simple regex-based extraction may fail. This would require
    // a more sophisticated parser that understands JSON string escaping.

    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<call>".to_string()],
        end_tokens: vec!["</call>".to_string()],
        separator: ", ".to_string(),
    });

    let input =
        r#"<call>{"name": "echo", "arguments": {"text": "Use <call> and </call> tags"}}</call>"#;

91
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
92
93
94

    // This is a known limitation - the parser may fail when end tokens appear in content
    // For now, we accept this behavior
95
    if tools.is_empty() {
96
97
        // Parser failed due to nested tokens - this is expected
        assert_eq!(
98
            tools.len(),
99
100
101
102
103
            0,
            "Known limitation: nested wrapper tokens in content"
        );
    } else {
        // If it does parse, verify it's correct
104
105
        assert_eq!(tools[0].function.name, "echo");
        let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
        assert_eq!(args["text"], "Use <call> and </call> tags");
    }
}

#[tokio::test]
async fn test_json_extraction_without_wrapper_tokens() {
    // Default parser without wrapper tokens should extract JSON from text
    let parser = JsonParser::new();

    let input = r#"
    Here is some text before the JSON.
    {"name": "search", "arguments": {"query": "test"}}
    And here is some text after.
    "#;

121
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
122
    assert_eq!(tools.len(), 1);
123
124
125
126
    assert_eq!(
        normal_text,
        "\n    Here is some text before the JSON.\n    \n    And here is some text after.\n    "
    );
127
    assert_eq!(tools[0].function.name, "search");
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
}

#[tokio::test]
async fn test_json_with_multiline_wrapper_content() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["```json\n".to_string()],
        end_tokens: vec!["\n```".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"Here's the function call:
```json
{
    "name": "format_code",
    "arguments": {
        "language": "rust",
        "code": "fn main() {}"
    }
}
```
Done!"#;

150
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
151
    assert_eq!(tools.len(), 1);
152
    assert_eq!(normal_text, "");
153
    assert_eq!(tools[0].function.name, "format_code");
154
155
156
157
158
159
160
161
162
163
164
165
}

#[tokio::test]
async fn test_json_with_special_chars_in_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["{{FUNC[[".to_string()],
        end_tokens: vec!["]]FUNC}}".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"{{FUNC[[{"name": "test", "arguments": {"special": "[]{}"}}]]FUNC}}"#;

166
167
168
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
169

170
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    assert_eq!(args["special"], "[]{}");
}

#[tokio::test]
async fn test_json_multiple_tools_with_wrapper() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<fn>".to_string()],
        end_tokens: vec!["</fn>".to_string()],
        separator: ", ".to_string(),
    });

    // Multiple wrapped JSON objects
    let input = r#"
    <fn>{"name": "tool1", "arguments": {}}</fn>
    Some text between.
    <fn>{"name": "tool2", "arguments": {"x": 1}}</fn>
    "#;

    // Current implementation might handle this as separate calls
    // Let's test that at least the first one is parsed
191
192
193
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert!(!tools.is_empty(), "Should parse at least one tool");
    assert_eq!(tools[0].function.name, "tool1");
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
}

#[tokio::test]
async fn test_json_wrapper_with_array() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tools>".to_string()],
        end_tokens: vec!["</tools>".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"<tools>[
        {"name": "func1", "arguments": {}},
        {"name": "func2", "arguments": {"param": "value"}}
    ]</tools>"#;

209
210
211
212
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 2);
    assert_eq!(tools[0].function.name, "func1");
    assert_eq!(tools[1].function.name, "func2");
213
214
215
216
217
218
219
220
221
222
223
224
}

#[tokio::test]
async fn test_json_incomplete_wrapper_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string()],
        end_tokens: vec!["</tool>".to_string()],
        separator: ", ".to_string(),
    });

    // Missing end token
    let input = r#"<tool>{"name": "test", "arguments": {}}"#;
225
226
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0, "Should not parse without closing token");
227
228
229

    // Missing start token
    let input = r#"{"name": "test", "arguments": {}}</tool>"#;
230
231
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0, "Should not parse without opening token");
232
233
234
235
236
237
238
239
240
241
242
243
}

#[tokio::test]
async fn test_json_empty_wrapper_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![],
        end_tokens: vec![],
        separator: ", ".to_string(),
    });

    let input = r#"{"name": "test", "arguments": {"key": "value"}}"#;

244
245
246
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
247
}