tool_parser_wrapper_tokens.rs 8.23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Wrapper Token Tests
//!
//! Tests for JSON parser with custom wrapper tokens

use sglang_router_rs::tool_parser::{JsonParser, TokenConfig, ToolParser};

#[tokio::test]
async fn test_json_with_xml_style_wrapper() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string()],
        end_tokens: vec!["</tool>".to_string()],
        separator: ", ".to_string(),
    });

    let input =
        r#"Some text before <tool>{"name": "test", "arguments": {"x": 1}}</tool> and after"#;

18
19
20
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
21

22
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
23
24
25
26
27
28
29
30
31
32
33
34
    assert_eq!(args["x"], 1);
}

#[tokio::test]
async fn test_json_with_multiple_wrapper_pairs() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string(), "<<TOOL>>".to_string()],
        end_tokens: vec!["</tool>".to_string(), "<</TOOL>>".to_string()],
        separator: ", ".to_string(),
    });

    let input1 = r#"<tool>{"name": "tool1", "arguments": {}}</tool>"#;
35
36
37
    let (_normal_text, tools1) = parser.parse_complete(input1).await.unwrap();
    assert_eq!(tools1.len(), 1);
    assert_eq!(tools1[0].function.name, "tool1");
38
39

    let input2 = r#"<<TOOL>>{"name": "tool2", "arguments": {}}<</TOOL>>"#;
40
41
42
    let (_normal_text, tools2) = parser.parse_complete(input2).await.unwrap();
    assert_eq!(tools2.len(), 1);
    assert_eq!(tools2[0].function.name, "tool2");
43
44
45
46
47
48
49
50
51
52
53
54
}

#[tokio::test]
async fn test_json_with_only_start_token() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![">>>FUNCTION:".to_string()],
        end_tokens: vec!["".to_string()], // Empty end token
        separator: ", ".to_string(),
    });

    let input = r#"Some preamble >>>FUNCTION:{"name": "execute", "arguments": {"cmd": "ls"}}"#;

55
56
57
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "execute");
58
59
60
61
62
63
64
65
66
67
68
69
70
}

#[tokio::test]
async fn test_json_with_custom_separator() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["[FUNC]".to_string()],
        end_tokens: vec!["[/FUNC]".to_string()],
        separator: " | ".to_string(), // Custom separator
    });

    // Though we're not testing multiple tools here, the separator is configured
    let input = r#"[FUNC]{"name": "test", "arguments": {}}[/FUNC]"#;

71
72
73
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
}

#[tokio::test]
async fn test_json_with_nested_wrapper_tokens_in_content() {
    // Known limitation: When wrapper tokens appear inside JSON strings,
    // the simple regex-based extraction may fail. This would require
    // a more sophisticated parser that understands JSON string escaping.

    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<call>".to_string()],
        end_tokens: vec!["</call>".to_string()],
        separator: ", ".to_string(),
    });

    let input =
        r#"<call>{"name": "echo", "arguments": {"text": "Use <call> and </call> tags"}}</call>"#;

91
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
92
93
94

    // This is a known limitation - the parser may fail when end tokens appear in content
    // For now, we accept this behavior
95
    if tools.is_empty() {
96
97
        // Parser failed due to nested tokens - this is expected
        assert_eq!(
98
            tools.len(),
99
100
101
102
103
            0,
            "Known limitation: nested wrapper tokens in content"
        );
    } else {
        // If it does parse, verify it's correct
104
105
        assert_eq!(tools[0].function.name, "echo");
        let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
        assert_eq!(args["text"], "Use <call> and </call> tags");
    }
}

#[tokio::test]
async fn test_json_extraction_without_wrapper_tokens() {
    // Default parser without wrapper tokens should extract JSON from text
    let parser = JsonParser::new();

    let input = r#"
    Here is some text before the JSON.
    {"name": "search", "arguments": {"query": "test"}}
    And here is some text after.
    "#;

121
122
123
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "search");
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
}

#[tokio::test]
async fn test_json_with_multiline_wrapper_content() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["```json\n".to_string()],
        end_tokens: vec!["\n```".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"Here's the function call:
```json
{
    "name": "format_code",
    "arguments": {
        "language": "rust",
        "code": "fn main() {}"
    }
}
```
Done!"#;

146
147
148
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "format_code");
149
150
151
152
153
154
155
156
157
158
159
160
}

#[tokio::test]
async fn test_json_with_special_chars_in_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["{{FUNC[[".to_string()],
        end_tokens: vec!["]]FUNC}}".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"{{FUNC[[{"name": "test", "arguments": {"special": "[]{}"}}]]FUNC}}"#;

161
162
163
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
164

165
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
    assert_eq!(args["special"], "[]{}");
}

#[tokio::test]
async fn test_json_multiple_tools_with_wrapper() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<fn>".to_string()],
        end_tokens: vec!["</fn>".to_string()],
        separator: ", ".to_string(),
    });

    // Multiple wrapped JSON objects
    let input = r#"
    <fn>{"name": "tool1", "arguments": {}}</fn>
    Some text between.
    <fn>{"name": "tool2", "arguments": {"x": 1}}</fn>
    "#;

    // Current implementation might handle this as separate calls
    // Let's test that at least the first one is parsed
186
187
188
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert!(!tools.is_empty(), "Should parse at least one tool");
    assert_eq!(tools[0].function.name, "tool1");
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
}

#[tokio::test]
async fn test_json_wrapper_with_array() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tools>".to_string()],
        end_tokens: vec!["</tools>".to_string()],
        separator: ", ".to_string(),
    });

    let input = r#"<tools>[
        {"name": "func1", "arguments": {}},
        {"name": "func2", "arguments": {"param": "value"}}
    ]</tools>"#;

204
205
206
207
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 2);
    assert_eq!(tools[0].function.name, "func1");
    assert_eq!(tools[1].function.name, "func2");
208
209
210
211
212
213
214
215
216
217
218
219
}

#[tokio::test]
async fn test_json_incomplete_wrapper_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string()],
        end_tokens: vec!["</tool>".to_string()],
        separator: ", ".to_string(),
    });

    // Missing end token
    let input = r#"<tool>{"name": "test", "arguments": {}}"#;
220
221
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0, "Should not parse without closing token");
222
223
224

    // Missing start token
    let input = r#"{"name": "test", "arguments": {}}</tool>"#;
225
226
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0, "Should not parse without opening token");
227
228
229
230
231
232
233
234
235
236
237
238
}

#[tokio::test]
async fn test_json_empty_wrapper_tokens() {
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![],
        end_tokens: vec![],
        separator: ", ".to_string(),
    });

    let input = r#"{"name": "test", "arguments": {"key": "value"}}"#;

239
240
241
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
242
}