"vscode:/vscode.git/clone" did not exist on "72b5f3d0bb79818aa9906b4fd43b75ba15572f45"
tool_parser_pythonic.rs 18.5 KB
Newer Older
1
2
3
4
5
6
7
//! Pythonic Parser Integration Tests
//!
//! Tests for the Pythonic parser which handles Python function call syntax

use serde_json::json;
use sglang_router_rs::tool_parser::{PythonicParser, ToolParser};

8
9
10
mod common;
use common::create_test_tools;

11
12
13
14
15
#[tokio::test]
async fn test_pythonic_single_function() {
    let parser = PythonicParser::new();
    let input = r#"[get_weather(city="London", units="celsius")]"#;

16
17
18
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "get_weather");
19

20
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
21
22
23
24
25
26
27
28
29
30
    assert_eq!(args["city"], "London");
    assert_eq!(args["units"], "celsius");
}

#[tokio::test]
async fn test_pythonic_multiple_functions() {
    let parser = PythonicParser::new();
    let input =
        r#"[search_web(query="Rust programming", max_results=5), get_time(timezone="UTC")]"#;

31
32
33
34
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 2);
    assert_eq!(tools[0].function.name, "search_web");
    assert_eq!(tools[1].function.name, "get_time");
35

36
    let args0: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
37
38
39
40
41
42
43
44
45
    assert_eq!(args0["query"], "Rust programming");
    assert_eq!(args0["max_results"], 5);
}

#[tokio::test]
async fn test_pythonic_with_python_literals() {
    let parser = PythonicParser::new();
    let input = r#"[configure(enabled=True, disabled=False, optional=None)]"#;

46
47
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
48

49
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
50
51
52
53
54
55
56
57
58
59
60
    assert_eq!(args["enabled"], true);
    assert_eq!(args["disabled"], false);
    assert_eq!(args["optional"], json!(null));
}

#[tokio::test]
async fn test_pythonic_with_lists_and_dicts() {
    let parser = PythonicParser::new();
    let input =
        r#"[process_data(items=[1, 2, 3], config={"key": "value", "nested": {"deep": True}})]"#;

61
62
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
63

64
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
65
66
67
68
69
70
71
72
73
74
75
76
    assert_eq!(args["items"], json!([1, 2, 3]));
    assert_eq!(args["config"]["key"], "value");
    assert_eq!(args["config"]["nested"]["deep"], true);
}

#[tokio::test]
async fn test_pythonic_with_special_tokens() {
    let parser = PythonicParser::new();

    // Llama 4 sometimes outputs these tokens
    let input = r#"<|python_start|>[calculate(x=10, y=20)]<|python_end|>"#;

77
78
79
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "calculate");
80

81
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
82
83
84
85
86
87
88
89
90
    assert_eq!(args["x"], 10);
    assert_eq!(args["y"], 20);
}

#[tokio::test]
async fn test_pythonic_with_nested_parentheses() {
    let parser = PythonicParser::new();
    let input = r#"[math_eval(expression="(2 + 3) * (4 - 1)", round_to=2)]"#;

91
92
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
93

94
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
95
96
97
98
99
100
101
102
103
    assert_eq!(args["expression"], "(2 + 3) * (4 - 1)");
    assert_eq!(args["round_to"], 2);
}

#[tokio::test]
async fn test_pythonic_with_escaped_quotes() {
    let parser = PythonicParser::new();
    let input = r#"[echo(text="She said \"Hello\" to him")]"#;

104
105
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
106

107
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
108
109
110
111
112
113
114
115
    assert_eq!(args["text"], "She said \"Hello\" to him");
}

#[tokio::test]
async fn test_pythonic_empty_arguments() {
    let parser = PythonicParser::new();
    let input = r#"[ping()]"#;

116
117
118
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "ping");
119

120
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
121
122
123
124
125
126
127
    assert_eq!(args, json!({}));
}

#[tokio::test]
async fn test_pythonic_format_detection() {
    let parser = PythonicParser::new();

128
129
130
131
    assert!(!parser.has_tool_markers("[function_name(")); // Incomplete
    assert!(parser.has_tool_markers("[get_weather(city=\"NYC\")]"));
    assert!(!parser.has_tool_markers("Just plain text"));
    assert!(!parser.has_tool_markers("{\"name\": \"test\"}")); // JSON
132
133
134
135
136
137
138
139
}

#[tokio::test]
async fn test_pythonic_invalid_syntax() {
    let parser = PythonicParser::new();

    // Missing closing bracket
    let input = r#"[function(arg=value"#;
140
141
    if let Ok((_normal_text, tools)) = parser.parse_complete(input).await {
        assert_eq!(tools.len(), 0);
142
143
144
145
146
147
148
    }
    // Error is also acceptable for invalid syntax

    // Invalid Python syntax - empty parameter name
    // Note: The parser currently accepts this invalid syntax and returns a result
    // This is a known limitation of the current implementation
    let input = r#"[function(=value)]"#;
149
    if let Ok((_normal_text, tools)) = parser.parse_complete(input).await {
150
151
        // The parser incorrectly accepts this, returning 1 result
        // We'll accept this behavior for now but note it's not ideal
152
        assert!(tools.len() <= 1, "Should parse at most one function");
153
154
155
156
157
158
159
160
161
162
163
    }
    // Error would be the correct behavior
}

#[tokio::test]
async fn test_pythonic_real_world_llama4() {
    let parser = PythonicParser::new();

    // Actual output from Llama 4 model
    let input = r#"I'll help you with multiple tasks. Let me search for information and perform calculations.

Stefan He's avatar
Stefan He committed
164
[web_search(query="latest Rust features", max_results=3, safe_search=True),
165
166
167
168
169
 calculate(expression="42 * 3.14159", precision=2),
 get_weather(city="San Francisco", units="fahrenheit", include_forecast=False)]

These functions will provide the information you need."#;

170
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
171
    assert_eq!(tools.len(), 3);
172
    assert_eq!(normal_text, "I'll help you with multiple tasks. Let me search for information and perform calculations.\n\n\n\nThese functions will provide the information you need.");
173
174
175
    assert_eq!(tools[0].function.name, "web_search");
    assert_eq!(tools[1].function.name, "calculate");
    assert_eq!(tools[2].function.name, "get_weather");
176

177
    let args0: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
178
179
180
181
182
183
184
185
186
187
    assert_eq!(args0["query"], "latest Rust features");
    assert_eq!(args0["safe_search"], true);
}

#[tokio::test]
async fn test_pythonic_nested_brackets_in_lists() {
    let parser = PythonicParser::new();

    let input = r#"[process_matrix(data=[[1, 2], [3, 4]], labels=["row[0]", "row[1]"])]"#;

188
189
190
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "process_matrix");
191

192
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
193
194
195
196
197
198
199
200
201
202
203
    assert_eq!(args["data"], json!([[1, 2], [3, 4]]));
    assert_eq!(args["labels"], json!(["row[0]", "row[1]"]));
}

#[tokio::test]
async fn test_pythonic_nested_brackets_in_dicts() {
    let parser = PythonicParser::new();

    let input =
        r#"[analyze(config={"patterns": ["[a-z]+", "[0-9]+"], "nested": {"list": [1, [2, 3]]}})]"#;

204
205
206
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "analyze");
207

208
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
209
210
211
212
213
214
215
216
217
218
    assert_eq!(args["config"]["patterns"], json!(["[a-z]+", "[0-9]+"]));
    assert_eq!(args["config"]["nested"]["list"], json!([1, [2, 3]]));
}

#[tokio::test]
async fn test_pythonic_mixed_quotes() {
    let parser = PythonicParser::new();

    let input = r#"[format_text(single='Hello', double="World", mixed="It's \"quoted\"")]"#;

219
220
221
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "format_text");
222

223
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    assert_eq!(args["single"], "Hello");
    assert_eq!(args["double"], "World");
    assert_eq!(args["mixed"], "It's \"quoted\"");
}

#[tokio::test]
async fn test_pythonic_complex_nesting() {
    let parser = PythonicParser::new();

    let input = r#"[transform(
        matrix=[[1, [2, 3]], [4, [5, [6, 7]]]],
        operations=[{"type": "scale", "factor": [2, 3]}, {"type": "rotate", "angle": 90}],
        metadata={"tags": ["nested[0]", "nested[1]"], "config": {"depth": [1, 2, 3]}}
    )]"#;

239
240
241
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "transform");
242

243
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
244
245
246
247
248
    assert!(args["matrix"].is_array());
    assert!(args["operations"].is_array());
    assert_eq!(args["operations"][0]["type"], "scale");
    assert_eq!(args["metadata"]["config"]["depth"], json!([1, 2, 3]));
}
249
250
251

#[tokio::test]
async fn test_parse_streaming_no_brackets() {
252
253
254
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
255
256

    let text = "This is just normal text without any tool calls.";
257
    let result = parser.parse_incremental(text, &tools).await.unwrap();
258

259
260
    // Expected - no tool calls found
    assert!(result.calls.is_empty());
261
262
263
264
}

#[tokio::test]
async fn test_parse_streaming_complete_tool_call() {
265
266
267
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
268
269

    let text = "Here's a tool call: [get_weather(location='New York', unit='celsius')]";
270
271
272
273
274
275
276
    let result = parser.parse_incremental(text, &tools).await.unwrap();

    assert!(!result.calls.is_empty(), "Should parse complete tool call");
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "get_weather");
    let args: serde_json::Value = serde_json::from_str(&result.calls[0].parameters).unwrap();
    assert_eq!(args["location"], "New York");
    assert_eq!(args["unit"], "celsius");
277
278
279
280
}

#[tokio::test]
async fn test_parse_streaming_text_before_tool_call() {
281
282
283
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
284
285

    let text = "This is some text before [get_weather(location='London')]";
286
    let result = parser.parse_incremental(text, &tools).await.unwrap();
287

288
289
290
291
    assert!(!result.calls.is_empty(), "Should parse tool call");
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "get_weather");
    let args: serde_json::Value = serde_json::from_str(&result.calls[0].parameters).unwrap();
    assert_eq!(args["location"], "London");
292
293
294
295
}

#[tokio::test]
async fn test_parse_streaming_partial_tool_call() {
296
297
298
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
299
300
301

    // First chunk with opening bracket but no closing bracket
    let text1 = "Let me check the weather: [get_weather(location=";
302
    let result1 = parser.parse_incremental(text1, &tools).await.unwrap();
303

304
305
306
307
308
    // First chunk should be incomplete
    assert!(
        result1.calls.is_empty(),
        "First chunk should not return tool call"
    );
309
310
311

    // Second chunk completing the tool call
    let text2 = "'Paris')]";
312
313
314
315
316
317
318
319
320
    let result2 = parser.parse_incremental(text2, &tools).await.unwrap();

    assert!(
        !result2.calls.is_empty(),
        "Second chunk should complete tool call"
    );
    assert_eq!(result2.calls[0].name.as_ref().unwrap(), "get_weather");
    let args: serde_json::Value = serde_json::from_str(&result2.calls[0].parameters).unwrap();
    assert_eq!(args["location"], "Paris");
321
322
323
324
}

#[tokio::test]
async fn test_parse_streaming_bracket_without_text_before() {
325
326
327
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
328
329

    let text = "[search(query='python programming')]";
330
    let result = parser.parse_incremental(text, &tools).await.unwrap();
331

332
333
334
335
    assert!(!result.calls.is_empty(), "Should parse tool call");
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "search");
    let args: serde_json::Value = serde_json::from_str(&result.calls[0].parameters).unwrap();
    assert_eq!(args["query"], "python programming");
336
337
338
339
}

#[tokio::test]
async fn test_parse_streaming_text_after_tool_call() {
340
341
342
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
343
344
345

    // First chunk with complete tool call and some text after
    let text = "[get_weather(location='Tokyo')] Here's the forecast:";
346
    let result = parser.parse_incremental(text, &tools).await.unwrap();
347

348
349
350
    assert!(!result.calls.is_empty(), "Should parse tool call");
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "get_weather");
    // Text after tool call is handled by parser internally
351
352
353
354
}

#[tokio::test]
async fn test_parse_streaming_multiple_tool_calls() {
355
356
357
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
358
359
360
361

    let text = "[get_weather(location='Berlin'), search(query='restaurants')]";

    // Current implementation may handle this as a single parse
362
    let result = parser.parse_incremental(text, &tools).await.unwrap();
363
364

    // The parser should handle multiple tools in one bracket pair
365
366
367
368
    // This test is flexible about the implementation behavior
    if !result.calls.is_empty() {
        // Parser found at least one tool
        assert!(result.calls[0].name.is_some());
369
    }
370
    // Also acceptable if parser returns empty waiting for more context
371
372
373
374
}

#[tokio::test]
async fn test_parse_streaming_opening_bracket_only() {
375
376
377
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
378
379

    let text = "Let's try this: [";
380
    let result = parser.parse_incremental(text, &tools).await.unwrap();
381

382
383
384
385
386
    // Should be incomplete - no complete tool call
    assert!(
        result.calls.is_empty(),
        "Should not return tool call for partial bracket"
    );
387
388
389
390
}

#[tokio::test]
async fn test_parse_streaming_nested_brackets() {
391
392
393
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
394
395

    let text = "[get_weather(location='New York', unit='celsius', data=[1, 2, 3])]";
396
397
398
399
400
401
402
403
404
405
406
    let result = parser.parse_incremental(text, &tools).await.unwrap();

    assert!(
        !result.calls.is_empty(),
        "Should parse tool call with nested brackets"
    );
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "get_weather");
    let args: serde_json::Value = serde_json::from_str(&result.calls[0].parameters).unwrap();
    assert_eq!(args["location"], "New York");
    assert_eq!(args["unit"], "celsius");
    assert_eq!(args["data"], json!([1, 2, 3]));
407
408
409
410
}

#[tokio::test]
async fn test_parse_streaming_nested_brackets_dict() {
411
412
    let mut parser = PythonicParser::new();
    let tools = create_test_tools();
413
414

    let text = r#"[search(query='test', config={'options': [1, 2], 'nested': {'key': 'value'}})]"#;
415
416
417
418
419
420
421
422
423
424
425
    let result = parser.parse_incremental(text, &tools).await.unwrap();

    assert!(
        !result.calls.is_empty(),
        "Should parse tool call with nested dict"
    );
    assert_eq!(result.calls[0].name.as_ref().unwrap(), "search");
    let args: serde_json::Value = serde_json::from_str(&result.calls[0].parameters).unwrap();
    assert_eq!(args["query"], "test");
    assert_eq!(args["config"]["options"], json!([1, 2]));
    assert_eq!(args["config"]["nested"]["key"], "value");
426
427
428
429
}

#[tokio::test]
async fn test_parse_streaming_multiple_tools_with_nested_brackets() {
430
431
432
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
433
434
435

    let text =
        "[get_weather(location='Paris', data=[10, 20]), search(query='test', filters=['a', 'b'])]";
436
    let result = parser.parse_incremental(text, &tools).await.unwrap();
437

438
439
440
441
    // Should parse tools successfully
    if !result.calls.is_empty() {
        // At least gets the first tool
        assert!(result.calls[0].name.is_some());
442
443
444
445
446
    }
}

#[tokio::test]
async fn test_parse_streaming_partial_nested_brackets() {
447
448
449
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
450
451
452

    // First chunk with nested brackets but incomplete
    let text1 = "Here's a call: [get_weather(location='Tokyo', data=[1, 2";
453
    let result1 = parser.parse_incremental(text1, &tools).await.unwrap();
454

455
456
    // First chunk should be incomplete
    assert!(result1.calls.is_empty(), "First chunk should not complete");
457
458
459

    // Second chunk completing the nested brackets
    let text2 = ", 3])]";
460
461
462
463
464
465
466
467
468
469
    let result2 = parser.parse_incremental(text2, &tools).await.unwrap();

    assert!(
        !result2.calls.is_empty(),
        "Second chunk should complete tool call"
    );
    assert_eq!(result2.calls[0].name.as_ref().unwrap(), "get_weather");
    let args: serde_json::Value = serde_json::from_str(&result2.calls[0].parameters).unwrap();
    assert_eq!(args["location"], "Tokyo");
    assert_eq!(args["data"], json!([1, 2, 3]));
470
471
472
473
}

#[tokio::test]
async fn test_parse_streaming_with_python_start_and_end_token() {
474
475
476
    let mut parser = PythonicParser::new();

    let tools = create_test_tools();
477
478
479
480
481
482
483
484
485
486
487
488

    let chunks = vec![
        "Here's a call: ",
        "<|python_",
        "start|>[get_weather(location=",
        "'Tokyo', data=[1, 2",
        ", 3])]<|python_end|>",
    ];

    let mut got_tool = false;

    for chunk in chunks {
489
490
491
492
493
494
495
496
497
498
        let result = parser.parse_incremental(chunk, &tools).await.unwrap();
        if !result.calls.is_empty() {
            if let Some(name) = &result.calls[0].name {
                assert_eq!(name, "get_weather");
                let args: serde_json::Value =
                    serde_json::from_str(&result.calls[0].parameters).unwrap();
                assert_eq!(args["location"], "Tokyo");
                assert_eq!(args["data"], json!([1, 2, 3]));
                got_tool = true;
            }
499
500
501
502
503
504
505
506
507
508
509
        }
    }

    assert!(got_tool, "Should have parsed the tool call");
}

#[tokio::test]
async fn test_detect_and_parse_with_python_start_and_end_token() {
    let parser = PythonicParser::new();

    let text = "User wants to get the weather in Mars. <|python_start|>[get_weather(location='Mars', unit='celsius')]<|python_end|> In this way we will get the weather in Mars.";
510
    let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
511

512
513
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "get_weather");
514

515
    let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
516
517
518
    assert_eq!(args["location"], "Mars");
    assert_eq!(args["unit"], "celsius");
}