tool_parser_fallback.rs 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
//! Tests for tool parser fallback behavior
//!
//! When tool call parsing fails, the original text should be preserved as normal text
//! rather than being lost. This ensures graceful degradation.

use sglang_router_rs::tool_parser::{
    DeepSeekParser, JsonParser, LlamaParser, MistralParser, QwenParser, ToolParser,
};

#[tokio::test]
async fn test_json_parser_invalid_json_returns_as_normal_text() {
    let parser = JsonParser::new();

    // Malformed JSON should be returned as normal text (note: commas may be processed)
    let input = r#"{"name": "test", "arguments": invalid json here}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(
        normal_text,
        r#"{"name": "test", "arguments": invalid json here}"#
    );

    // Plain text with no JSON structure should be returned as normal text
    let input = "This is just plain text that should not be parsed as a tool call";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input);

    // Text that looks like it might have JSON but doesn't should be returned as normal text
    let input = "The user said: {something} but it's not valid JSON";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input);
}

#[tokio::test]
async fn test_qwen_parser_invalid_format_returns_as_normal_text() {
    let parser = QwenParser::new();

    // Missing closing tag
    let input = r#"<tool_call>
{"name": "test", "arguments": {}}
This text is missing the closing tag"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve original text when no valid tools found

    // Malformed JSON inside valid tags
    let input = r#"<tool_call>
{"name": "test", "arguments": invalid}
</tool_call>"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    // When JSON parsing fails but tags are present, it should preserve the original text
    assert_eq!(normal_text, input);

    // Plain text without any tool markers
    let input = "This is a regular response without any tool calls.";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should return original text when no markers found
}

#[tokio::test]
async fn test_llama_parser_invalid_format_returns_as_normal_text() {
    let parser = LlamaParser::new();

    // Invalid JSON after python_tag
    let input = r#"<|python_tag|>{"name": "test", "arguments": invalid}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve original text when parsing fails

    // Plain text without markers or JSON
    let input = "Just explaining something without any function calls.";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should return original text

    // Text with python_tag but completely invalid content
    let input = r#"Here's my response <|python_tag|>not even close to JSON"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve everything when parsing fails
}

#[tokio::test]
async fn test_mistral_parser_invalid_format_returns_as_normal_text() {
    let parser = MistralParser::new();

    // Missing closing bracket
    let input = r#"[TOOL_CALLS] [{"name": "test", "arguments": {}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve original text when parsing fails

    // Invalid JSON in tool calls section
    let input = r#"[TOOL_CALLS] [{"name": invalid json}]"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve original text when parsing fails

    // Plain text
    let input = "No tool calls here, just regular text.";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should return original text
}

#[tokio::test]
async fn test_deepseek_parser_invalid_format_returns_as_normal_text() {
    let parser = DeepSeekParser::new();

    // Invalid JSON after emoji marker
    let input = r#"🤔[{"name": "test", "arguments": malformed}]"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should preserve original text when parsing fails

    // Emoji but no JSON array
    let input = "🤔 Just thinking about this problem...";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should return original text

    // No emoji marker at all
    let input = "Regular response without any special markers.";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Should return original text
}

#[tokio::test]
async fn test_mixed_valid_and_invalid_content() {
    let parser = QwenParser::new();

    // Text with one valid tool call and one invalid
    let input = r#"Let me help you with that.
<tool_call>
{"name": "valid_tool", "arguments": {"x": 1}}
</tool_call>
And here's another one:
<tool_call>
{"name": "invalid_tool", "arguments": malformed}
</tool_call>
That's all!"#;

    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 1); // Should extract the valid tool
    assert_eq!(tools[0].function.name, "valid_tool");
    // Normal text should contain the text around the valid tool call
    assert!(normal_text.contains("Let me help you"));
    assert!(normal_text.contains("That's all!"));
}

#[tokio::test]
async fn test_partial_tool_markers() {
    // Test cases where tool markers are incomplete or cut off

    let parser = QwenParser::new();
    let input = "<tool_call>\nThis looks like it might be a tool call but it's not";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input);

    let parser = MistralParser::new();
    let input = "[TOOL_CALLS] But then nothing follows...";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input);

    let parser = LlamaParser::new();
    let input = "Starting a response <|python_tag|> but no JSON";
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input);
}

#[tokio::test]
async fn test_escaped_json_like_content() {
    // Test that JSON-like content in regular text doesn't get parsed as tools

    let parser = JsonParser::new();
    let input = r#"The user typed: {"name": "example"} but this is just quoted text"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    // JsonParser should extract the valid JSON and return normal text
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "example");
    assert_eq!(normal_text, "The user typed:  but this is just quoted text");

    let parser = QwenParser::new();
    let input = r#"The syntax is: <tool_call>
{"name": "example"}
</tool_call> - that's how you format it"#;
    let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
    // This actually contains valid tool call syntax, so it should parse
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].function.name, "example");
}

#[tokio::test]
async fn test_unicode_and_special_chars_in_failed_parsing() {
    let parser = QwenParser::new();

    // Unicode in malformed tool calls
    let input = r#"<tool_call>
{"name": "测试", "arguments": 🚀 invalid}
</tool_call>"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    // Should handle Unicode properly in the fallback text
    assert!(!normal_text.is_empty() || normal_text == input);

    // Special characters that might confuse parsers
    let input = r#"Response: <tool_call>{"name": "test\n\t", "arguments": {"]}"}</tool_call>"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    // This might or might not parse depending on JSON handling of escape sequences
    if tools.is_empty() {
        assert!(!normal_text.is_empty() || normal_text == input);
    }
}

#[tokio::test]
async fn test_very_long_invalid_input() {
    let parser = JsonParser::new();

    // Generate a very long string that looks like it might be JSON but isn't
    let mut input = String::from("{\"name\": \"test\", \"arguments\": {");
    for i in 0..1000 {
        input.push_str(&format!("\"field{}\": \"value{}\", ", i, i));
    }
    input.push_str("\"final\": incomplete"); // Don't close the JSON properly

    let (normal_text, tools) = parser.parse_complete(&input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(normal_text, input); // Invalid JSON should be returned as normal text
}

#[tokio::test]
async fn test_almost_valid_tool_calls() {
    // Test tool calls that are almost valid but have small issues

    let parser = JsonParser::new();

    // Missing closing quote should be returned as normal text
    let input = r#"{"name": "test", "arguments": {"key": "value}}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0);
    assert_eq!(
        normal_text,
        r#"{"name": "test", "arguments": {"key": "value}}"#
    );

    // Extra comma
    let input = r#"{"name": "test", "arguments": {},}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    // Some JSON parsers might accept trailing commas
    if tools.is_empty() {
        assert_eq!(normal_text, r#"{"name": "test", "arguments": ,}"#);
    }

    // Wrong quote types
    let input = r#"{'name': 'test', 'arguments': {}}"#;
    let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
    assert_eq!(tools.len(), 0); // Standard JSON requires double quotes
    assert_eq!(normal_text, r#"{'name': 'test', 'arguments': }"#);
}