tool_parser_mixed_edge_cases.rs 9.66 KB
Newer Older
1
2
3
4
5
6
//! Mixed Format and Additional Edge Case Tests
//!
//! Tests for edge cases across parsers and mixed format scenarios

use serde_json::json;
use sglang_router_rs::tool_parser::{
7
    JsonParser, LlamaParser, MistralParser, PythonicParser, QwenParser, ToolParser,
8
9
};

10
11
12
mod common;
use common::create_test_tools;

13
14
15
16
17
18
19
20
21
#[tokio::test]
async fn test_mixed_formats_in_text() {
    let json_parser = JsonParser::new();
    let input = r#"
    Some text with [TOOL_CALLS] marker that shouldn't trigger.
    Also has <tool_call> tags and [function()] syntax.
    But here's the actual JSON: {"name": "test", "arguments": {}}
    "#;

22
23
24
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test");
25
26
27
28
29
30
31
32

    // Mistral parser should ignore JSON and other formats
    let mistral_parser = MistralParser::new();
    let input = r#"
    {"name": "fake"} [function()] <tool_call>
    [TOOL_CALLS] [{"name": "real", "arguments": {}}]
    "#;

33
34
35
    let (_normal_text, tools) = mistral_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "real");
36
37
38
39
40
41
42
}

#[tokio::test]
async fn test_format_markers_in_string_content() {
    let pythonic_parser = PythonicParser::new();
    let input = r#"[echo(text="Use [TOOL_CALLS] and <tool_call> in text")]"#;

43
44
45
    let (_normal_text, tools) = pythonic_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
46
47
48
49
50
51
52
    assert_eq!(args["text"], "Use [TOOL_CALLS] and <tool_call> in text");

    let qwen_parser = QwenParser::new();
    let input = r#"<tool_call>
{"name": "log", "arguments": {"msg": "Found [function()] pattern"}}
</tool_call>"#;

53
54
55
    let (_normal_text, tools) = qwen_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    assert_eq!(args["msg"], "Found [function()] pattern");
}

#[tokio::test]
async fn test_deeply_nested_json_structures() {
    let json_parser = JsonParser::new();

    let input = r#"{
        "name": "deep_process",
        "arguments": {
            "level1": {
                "level2": {
                    "level3": {
                        "level4": {
                            "level5": {
                                "data": [1, 2, [3, [4, 5]]]
                            }
                        }
                    }
                }
            }
        }
    }"#;

80
81
82
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "deep_process");
83

84
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
85
86
87
88
89
90
91
92
93
94
95
96
97
    assert!(args["level1"]["level2"]["level3"]["level4"]["level5"]["data"].is_array());
}

#[tokio::test]
async fn test_multiple_sequential_calls_different_formats() {
    // Simulate a scenario where different parts of text have different formats
    // (though each parser will only recognize its own format)

    let llama_parser = LlamaParser::new();

    // Llama parser currently only returns the first tool found
    let input = r#"First call: <|python_tag|>{"name": "call1", "arguments": {}}"#;

98
99
100
    let (_normal_text, tools) = llama_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "call1");
101
102

    let input2 = r#"{"name": "call2", "arguments": {"x": 1}}"#;
103
104
105
    let (_normal_text2, tools2) = llama_parser.parse_complete(input2).await.unwrap();
    assert_eq!(tools2.len(), 1);
    assert_eq!(tools2[0].function.name, "call2");
106
107
108
109
110
111
112
113
114
115
}

#[tokio::test]
async fn test_empty_and_whitespace_variations() {
    let json_parser = JsonParser::new();

    // Various whitespace scenarios
    let cases = vec![
        r#"  {"name":"compact","arguments":{}}  "#,
        r#"
Stefan He's avatar
Stefan He committed
116

117
        {"name": "spaced", "arguments": {}}
Stefan He's avatar
Stefan He committed
118

119
120
121
122
123
        "#,
        r#"	{"name": "tabbed", "arguments": {}}	"#, // tabs
    ];

    for input in cases {
124
125
        let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
        assert_eq!(tools.len(), 1, "Should parse regardless of whitespace");
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    }
}

#[tokio::test]
async fn test_special_json_values() {
    let json_parser = JsonParser::new();

    let input = r#"{
        "name": "test_special",
        "arguments": {
            "float_e": 1.23e10,
            "float_neg_e": 1.23e-10,
            "hex_like": "0x1234",
            "very_long_num": 99999999999999999999,
            "special_strings": ["", " ", "\u0000", "\u001f"],
            "escaped": "\\n\\r\\t\\\"\\\\",
            "unicode": "\u4e2d\u6587"
        }
    }"#;

146
147
148
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "test_special");
149

150
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
151
152
153
154
155
156
    assert!(args["special_strings"].is_array());
    assert!(args["escaped"].is_string());
}

#[tokio::test]
async fn test_parser_recovery_after_invalid_input() {
157
158
    let mut parser = JsonParser::new();
    let tools = create_test_tools();
159
160

    // Send invalid JSON first
161
    let _ = parser.parse_incremental(r#"{"broken": "#, &tools).await;
162

163
164
165
166
    // Create a new parser instance for clean state
    let mut parser2 = JsonParser::new();
    let result = parser2
        .parse_incremental(r#"{"name": "valid", "arguments": {}}"#, &tools)
167
168
169
        .await
        .unwrap();

170
171
172
    if !result.calls.is_empty() {
        if let Some(name) = &result.calls[0].name {
            assert_eq!(name, "valid");
173
174
175
176
177
178
179
180
181
182
        }
    }
}

#[tokio::test]
async fn test_boundary_cases_for_extraction() {
    let json_parser = JsonParser::new();

    // JSON at the very beginning
    let input = r#"{"name": "start", "arguments": {}} and then text"#;
183
184
185
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "start");
186
187
188

    // JSON at the very end
    let input = r#"Some text first {"name": "end", "arguments": {}}"#;
189
190
191
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "end");
192
193
194
195

    // Multiple JSON objects in text (should find first valid one)
    let input =
        r#"Text {"name": "first", "arguments": {}} more {"name": "second", "arguments": {}}"#;
196
197
198
    let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
    assert!(!tools.is_empty());
    assert_eq!(tools[0].function.name, "first");
199
200
201
202
203
204
205
206
}

#[tokio::test]
async fn test_pythonic_edge_cases() {
    let parser = PythonicParser::new();

    // Function name with underscores and numbers
    let input = r#"[func_name_2(param_1="value")]"#;
207
208
209
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "func_name_2");
210
211
212

    // Empty string argument
    let input = r#"[process(text="")]"#;
213
214
215
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
    assert_eq!(args["text"], "");
}

#[tokio::test]
async fn test_mistral_with_pretty_json() {
    let parser = MistralParser::new();

    // Pretty-printed JSON in Mistral format
    let input = r#"[TOOL_CALLS] [
        {
            "name": "formatted",
            "arguments": {
                "nested": {
                    "key": "value"
                },
                "array": [
                    1,
                    2,
                    3
                ]
            }
        }
    ]"#;

240
241
242
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "formatted");
243

244
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
245
246
247
248
249
250
251
252
253
254
255
256
257
    assert_eq!(args["nested"]["key"], "value");
    assert_eq!(args["array"], json!([1, 2, 3]));
}

#[tokio::test]
async fn test_qwen_with_cdata_like_content() {
    let parser = QwenParser::new();

    // Note: QwenParser expects exactly "<tool_call>\n" with the newline
    let input = r#"<tool_call>
{"name": "process", "arguments": {"xml": "<![CDATA[some data]]>"}}
</tool_call>"#;

258
259
260
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "process");
261

262
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
263
264
265
266
267
268
269
270
271
272
    assert_eq!(args["xml"], "<![CDATA[some data]]>");
}

#[tokio::test]
async fn test_extremely_long_function_names() {
    let parser = PythonicParser::new();

    let long_name = "very_long_function_name_that_might_appear_in_generated_code_somewhere";
    let input = format!(r#"[{}(param="value")]"#, long_name);

273
274
275
    let (_normal_text, tools) = parser.parse_complete(&input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, long_name);
276
277
278
279
280
281
282
283
284
}

#[tokio::test]
async fn test_json_with_duplicate_keys() {
    let parser = JsonParser::new();

    // JSON with duplicate keys (last one should win per JSON spec)
    let input = r#"{"name": "test", "arguments": {"key": "first", "key": "second"}}"#;

285
286
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
287

288
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
289
290
291
    // JSON parsers typically keep the last value for duplicate keys
    assert_eq!(args["key"], "second");
}