json_parser.rs 31 KB
Newer Older
1
2
3
4
5
6
7
8
9
use async_trait::async_trait;
use regex::Regex;
use serde_json::Value;

use crate::tool_parser::{
    errors::{ToolParserError, ToolParserResult},
    partial_json::PartialJson,
    state::ParseState,
    traits::ToolParser,
10
    types::{FunctionCall, StreamResult, TokenConfig, ToolCall},
11
12
13
14
15
16
17
18
19
20
21
};

/// JSON format parser for tool calls
///
/// Handles various JSON formats for function calling:
/// - Single tool call: {"name": "fn", "arguments": {...}}
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
///
/// Supports configurable token markers for different models
pub struct JsonParser {
22
23
    /// Token configuration for parsing
    token_config: TokenConfig,
24
25
26
27
28
29
30
31
32
    /// Parser for handling incomplete JSON during streaming
    partial_json: PartialJson,
    /// Regex patterns for extracting content between tokens
    extractors: Vec<Regex>,
}

impl JsonParser {
    /// Create a new JSON parser with default configuration
    pub fn new() -> Self {
33
34
35
36
37
        Self::with_config(TokenConfig {
            start_tokens: vec![],
            end_tokens: vec![],
            separator: ", ".to_string(),
        })
38
39
40
    }

    /// Create a parser with custom token configuration
41
    pub fn with_config(config: TokenConfig) -> Self {
42
        // Build extraction patterns for each token pair
43
44
        let extractors: Vec<Regex> = config
            .iter_pairs()
45
46
47
48
49
50
51
52
53
54
55
56
57
            .filter_map(|(start, end)| {
                if !start.is_empty() && !end.is_empty() {
                    // Use (?s) flag to enable DOTALL mode so . matches newlines
                    let pattern =
                        format!(r"(?s){}(.*?){}", regex::escape(start), regex::escape(end));
                    Regex::new(&pattern).ok()
                } else {
                    None
                }
            })
            .collect();

        Self {
58
            token_config: config,
59
60
61
62
63
64
65
            partial_json: PartialJson::default(),
            extractors,
        }
    }

    /// Extract JSON content from text, handling wrapper tokens if configured
    fn extract_json_content<'a>(&self, text: &'a str) -> &'a str {
66
        let mut content = text;
67

68
        // Try each extractor pattern (for tokens with both start and end)
69
70
71
        for extractor in &self.extractors {
            if let Some(captures) = extractor.captures(content) {
                if let Some(matched) = captures.get(1) {
72
                    return matched.as_str().trim();
73
74
75
76
77
                }
            }
        }

        // Handle special case where there's a start token but no end token
78
        for (start, end) in self.token_config.iter_pairs() {
79
            if !start.is_empty() && end.is_empty() {
80
81
82
83
84
                // Find the start token and extract everything after it
                if let Some(pos) = content.find(start) {
                    content = &content[pos + start.len()..];
                    return content.trim();
                }
85
86
87
            }
        }

88
89
90
        content.trim()
    }

91
92
93
94
95
96
97
98
99
100
101
102
103
    /// Try to extract a first valid JSON object or array from text that may contain other content
    /// Returns (json_string, normal_text) where normal_text is text before and after the JSON
    fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
        let mut in_string = false;
        let mut escape = false;
        let mut stack: Vec<char> = Vec::with_capacity(8);
        let mut start: Option<usize> = None;

        for (i, ch) in text.char_indices() {
            if escape {
                escape = false;
                continue;
            }
104

105
106
107
108
109
110
111
            match ch {
                '\\' if in_string => escape = true,
                '"' => in_string = !in_string,
                _ if in_string => {}
                '{' | '[' => {
                    if start.is_none() {
                        start = Some(i);
112
                    }
113
                    stack.push(ch);
114
                }
115
116
117
118
119
120
                '}' | ']' => {
                    let Some(open) = stack.pop() else {
                        // Stray closer - reset and continue looking for next valid JSON
                        start = None;
                        continue;
                    };
121

122
123
124
125
126
127
128
                    let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
                    if !valid {
                        // Mismatch - reset and continue looking
                        start = None;
                        stack.clear();
                        continue;
                    }
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
                    if stack.is_empty() {
                        let s = start.unwrap();
                        let e = i + ch.len_utf8();
                        let potential_json = &text[s..e];

                        // Validate that this is actually valid JSON before returning
                        if serde_json::from_str::<Value>(potential_json).is_ok() {
                            let json = potential_json.to_string();
                            let normal = format!("{}{}", &text[..s], &text[e..]);
                            return Some((json, normal));
                        } else {
                            // Not valid JSON, reset and continue looking
                            start = None;
                            continue;
144
145
146
                        }
                    }
                }
147
                _ => {}
148
149
150
            }
        }
        None
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    }

    /// Parse a single JSON object into a ToolCall
    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
        // Check if this looks like a tool call
        let name = obj
            .get("name")
            .or_else(|| obj.get("function"))
            .and_then(|v| v.as_str());

        if let Some(name) = name {
            // Get arguments - support both "arguments" and "parameters" keys
            let empty_obj = Value::Object(serde_json::Map::new());
            let args = obj
                .get("arguments")
                .or_else(|| obj.get("parameters"))
                .unwrap_or(&empty_obj);

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;

            // Generate a unique ID if not provided
            let id = obj
                .get("id")
                .and_then(|v| v.as_str())
                .map(String::from)
                .unwrap_or_else(|| format!("call_{}", uuid::Uuid::new_v4()));

            Ok(Some(ToolCall {
                id,
                r#type: "function".to_string(),
                function: FunctionCall {
                    name: name.to_string(),
                    arguments,
                },
            }))
        } else {
            Ok(None)
        }
    }

    /// Parse JSON value(s) into tool calls
    fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
        let mut tools = Vec::new();

        match value {
            Value::Array(arr) => {
                // Parse each element in the array
                for item in arr {
                    if let Some(tool) = self.parse_single_object(item)? {
                        tools.push(tool);
                    }
                }
            }
            Value::Object(_) => {
                // Single tool call
                if let Some(tool) = self.parse_single_object(value)? {
                    tools.push(tool);
                }
            }
            _ => {
                // Not a valid tool call format
                return Ok(vec![]);
            }
        }

        Ok(tools)
    }

    /// Check if text contains potential tool call markers
    fn has_tool_markers(&self, text: &str) -> bool {
        // If no start tokens configured, check for JSON structure
224
        if self.token_config.start_tokens.is_empty() {
225
226
227
228
229
            // For JSON, we just need to see the start of an object or array
            return text.contains('{') || text.contains('[');
        }

        // Check for any start token
230
231
        let has_start_token = self
            .token_config
232
233
            .start_tokens
            .iter()
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
            .any(|token| text.contains(token));

        // Also check if we have what looks like JSON even without start token
        // This handles cases where we've already processed the start token
        // and are working on subsequent tools
        has_start_token || (text.trim_start().starts_with('{') && text.contains(r#""name""#))
    }

    /// Check if text might contain a partial start token (for streaming)
    fn has_partial_start_token(&self, text: &str) -> bool {
        if self.token_config.start_tokens.is_empty() {
            return false;
        }

        // Check if the end of the buffer could be the start of any start token
        for start_token in &self.token_config.start_tokens {
            for i in 1..start_token.len() {
                let token_prefix = &start_token[..i];
                if text.ends_with(token_prefix) {
                    return true;
                }
            }
        }
        false
258
259
260
261
262
263
264
265
266
267
268
    }
}

impl Default for JsonParser {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl ToolParser for JsonParser {
269
    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
270
271
272
273
274
275
        // Check if we have multiple start tokens (e.g., multiple <|python_tag|> markers)
        if !self.token_config.start_tokens.is_empty() {
            let start_token = &self.token_config.start_tokens[0];
            if !start_token.is_empty() && text.matches(start_token).count() > 1 {
                // We have multiple occurrences of the start token
                let mut all_tools = Vec::new();
276
                let mut all_normal_text = String::new();
277
278
279
                let mut remaining = text;

                while let Some(start_pos) = remaining.find(start_token.as_str()) {
280
281
282
                    // Add text before this start token to normal text
                    all_normal_text.push_str(&remaining[..start_pos]);

283
284
285
286
287
288
289
290
291
292
293
294
295
                    // Extract content after this start token
                    let after_token = &remaining[start_pos + start_token.len()..];

                    // Find where this JSON ends (look for the next start token or end of string)
                    let end_pos = if let Some(next_start) = after_token.find(start_token.as_str()) {
                        next_start
                    } else {
                        after_token.len()
                    };

                    let json_content = &after_token[..end_pos];

                    // Try to extract and parse JSON from this segment
296
297
298
                    if let Some((extracted, segment_normal_text)) =
                        self.extract_json_from_text(json_content)
                    {
299
300
301
302
303
                        if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
                            if let Ok(tools) = self.parse_json_value(&value) {
                                all_tools.extend(tools);
                            }
                        }
304
305
306
307
308
                        // Add the normal text from this segment
                        all_normal_text.push_str(&segment_normal_text);
                    } else {
                        // If no JSON found, add the entire content as normal text
                        all_normal_text.push_str(json_content);
309
310
311
312
313
314
315
316
317
                    }

                    // Move to the next segment
                    remaining = &remaining[start_pos + start_token.len() + end_pos..];
                    if remaining.is_empty() {
                        break;
                    }
                }

318
319
320
321
                // Add any remaining text
                all_normal_text.push_str(remaining);

                return Ok((all_normal_text, all_tools));
322
323
324
            }
        }

325
326
327
        // Extract JSON content from wrapper tokens if present
        let json_content = self.extract_json_content(text);

328
        // Try to parse as JSON first
329
        match serde_json::from_str::<Value>(json_content) {
330
331
332
333
            Ok(value) => {
                let tools = self.parse_json_value(&value)?;
                Ok((String::new(), tools))
            }
334
            Err(_) => {
335
                // If parse failed, check if we have multiple JSON objects separated by the configured separator
336
337
                // Only do this if we can reasonably expect multiple complete JSON objects
                // (i.e., text starts and ends with JSON-like structure)
338
339
                if !self.token_config.separator.is_empty()
                    && json_content.contains(&self.token_config.separator)
340
341
                    && json_content.trim().starts_with('{')
                    && json_content.trim().ends_with('}')
342
343
344
345
346
347
                {
                    let mut all_tools = Vec::new();

                    // Split by separator and try to parse each part
                    let parts: Vec<&str> =
                        json_content.split(&self.token_config.separator).collect();
348
349
                    let mut normal_parts = Vec::new();

350
351
352
                    for part in parts {
                        let trimmed = part.trim();
                        if trimmed.is_empty() {
353
                            normal_parts.push(trimmed.to_string());
354
355
356
357
358
359
360
361
                            continue;
                        }

                        // Try to parse this part as JSON
                        if let Ok(value) = serde_json::from_str::<Value>(trimmed) {
                            if let Ok(tools) = self.parse_json_value(&value) {
                                all_tools.extend(tools);
                            }
362
363
364
365
                            normal_parts.push(trimmed.to_string());
                        } else if let Some((extracted, part_normal_text)) =
                            self.extract_json_from_text(trimmed)
                        {
366
367
368
369
370
371
                            // Try extracting JSON from this part
                            if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
                                if let Ok(tools) = self.parse_json_value(&value) {
                                    all_tools.extend(tools);
                                }
                            }
372
373
374
                            normal_parts.push(part_normal_text);
                        } else {
                            normal_parts.push(trimmed.to_string());
375
376
377
                        }
                    }

378
379
380
381
                    // Rejoin with the original separator to preserve it
                    let all_normal_text = normal_parts.join(&self.token_config.separator);

                    return Ok((all_normal_text, all_tools));
382
383
                }

384
                // If no wrapper tokens configured and parse failed, try to extract JSON from mixed text
385
                if self.token_config.start_tokens.is_empty() {
386
387
388
389
                    if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
                        if let Ok(value) = serde_json::from_str::<Value>(&extracted_json) {
                            let tools = self.parse_json_value(&value)?;
                            return Ok((normal_text, tools));
390
391
392
                        }
                    }
                }
393
394
395

                // No valid JSON found, return original text as normal text
                Ok((text.to_string(), vec![]))
396
397
398
399
400
401
402
403
404
405
406
407
408
            }
        }
    }

    async fn parse_incremental(
        &self,
        chunk: &str,
        state: &mut ParseState,
    ) -> ToolParserResult<StreamResult> {
        state.buffer.push_str(chunk);

        // Check if we have potential tool calls
        if !self.has_tool_markers(&state.buffer) {
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
            if self.has_partial_start_token(&state.buffer) {
                // We might be in the middle of receiving a start token, wait for more data
                return Ok(StreamResult::Incomplete);
            }

            // No tool markers and no partial tokens - return all buffered content as normal text
            let normal_text = std::mem::take(&mut state.buffer);
            return Ok(StreamResult::NormalText(normal_text));
        }

        // Check for text before tool markers and extract it as normal text
        if !self.token_config.start_tokens.is_empty() {
            let start_token = &self.token_config.start_tokens[0];
            if !start_token.is_empty() {
                if let Some(marker_pos) = state.buffer.find(start_token) {
                    if marker_pos > 0 {
                        // We have text before the tool marker - extract it as normal text
                        let normal_text: String = state.buffer.drain(..marker_pos).collect();
                        return Ok(StreamResult::NormalText(normal_text));
                    }
                }
            }
        } else {
            // For JSON without start tokens, look for the start of JSON structure
            // Find whichever comes first: '{' or '['
            let brace_pos = state.buffer.find('{');
            let bracket_pos = state.buffer.find('[');
            let json_pos = brace_pos.iter().chain(bracket_pos.iter()).min().copied();

            if let Some(pos) = json_pos {
                if pos > 0 {
                    // We have text before JSON structure - extract it as normal text
                    let normal_text: String = state.buffer.drain(..pos).collect();
                    return Ok(StreamResult::NormalText(normal_text));
                }
            }
445
446
        }

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
        // Extract JSON content first to check for separators
        let extracted_json = self.extract_json_content(&state.buffer);

        // Handle multiple JSON objects with separators
        // Check if we have a separator and potentially multiple JSON objects
        let separator = &self.token_config.separator;
        if !separator.is_empty() && extracted_json.contains(separator.as_str()) {
            // Try to find a complete JSON object before the separator
            if let Some(separator_pos) = extracted_json.find(separator.as_str()) {
                // Get JSON before separator
                let before_separator = &extracted_json[..separator_pos];

                // Try to parse the JSON before the separator
                match serde_json::from_str::<Value>(before_separator) {
                    Ok(value) => {
                        // Parse tool calls from this JSON
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // We need to figure out how much to remove from the original buffer
                            // Find where the separator is in the original buffer and remove up to and including it
                            if let Some(sep_in_original) = state.buffer.find(separator.as_str()) {
468
469
                                // Remove processed content up to and including separator
                                state.buffer.drain(..=sep_in_original + separator.len() - 1);
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
                            }

                            // Return the first tool as complete
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
                        }
                    }
                    Err(_) => {
                        // Failed to parse, continue to try other methods
                    }
                }
            }
        }

        // Handle multiple start tokens (e.g., multiple <|python_tag|> markers)
        if !self.token_config.start_tokens.is_empty() {
            let start_token = &self.token_config.start_tokens[0];
            if !start_token.is_empty() {
                // Find all occurrences of start token
                let occurrences: Vec<_> =
                    state.buffer.match_indices(start_token.as_str()).collect();
                if occurrences.len() > 1 {
                    // We have multiple start tokens, try to process the first complete one
                    let first_pos = occurrences[0].0;
                    let second_pos = occurrences[1].0;

                    // Extract content between first and second start token
                    let first_json_section = &state.buffer[first_pos..second_pos];
                    let json_content = self.extract_json_content(first_json_section);

                    // Try to parse this as complete JSON
                    if let Ok(value) = serde_json::from_str::<Value>(json_content) {
                        // Parse tool calls from this JSON
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // Remove the processed section from buffer
                            let remaining = state.buffer[second_pos..].to_string();
                            state.buffer = remaining;

                            // Return the first tool as complete
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
                        }
                    }
                }
            }
        }

        // Regular single JSON parsing
521
522
523
524
525
526
527
528
        // Extract JSON content
        let json_content = self.extract_json_content(&state.buffer);

        // Try to parse with partial JSON parser
        match self.partial_json.parse_value(json_content) {
            Ok((value, consumed)) => {
                // Check if we have a complete JSON structure
                if consumed == json_content.len() {
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
                    // Check if this is truly complete or just has null from incomplete parsing
                    // We need to ensure the JSON actually ends properly (not cut off mid-key)
                    let trimmed = json_content.trim();
                    let looks_complete = trimmed.ends_with('}') || trimmed.ends_with(']');

                    if looks_complete {
                        // Complete JSON, parse tool calls
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // Clear buffer since we consumed everything
                            state.buffer.clear();

                            // Return the first tool as complete
                            // TODO simplified version, address more complex version
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
546
547
548
549
550
                        }
                    }
                } else {
                    // Partial JSON, try to extract tool name
                    if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
551
                        // TODO simplified version, address more complex version
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
                        // Just return the tool name once we see it
                        if !state.in_string {
                            state.in_string = true; // Use as a flag for "name sent"
                            return Ok(StreamResult::ToolName {
                                index: 0,
                                name: name.to_string(),
                            });
                        }

                        // Check for complete arguments
                        if let Some(args) =
                            value.get("arguments").or_else(|| value.get("parameters"))
                        {
                            if let Ok(args_str) = serde_json::to_string(args) {
                                // Return arguments as a single update
                                return Ok(StreamResult::ToolArguments {
                                    index: 0,
                                    arguments: args_str,
                                });
                            }
                        }
                    }
                }
            }
            Err(_) => {
                // Failed to parse even as partial JSON
578
                // Continue waiting for more data
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
            }
        }

        Ok(StreamResult::Incomplete)
    }

    fn detect_format(&self, text: &str) -> bool {
        // Check if text contains JSON-like structure
        if self.has_tool_markers(text) {
            // Try to extract and parse
            let json_content = self.extract_json_content(text);

            // Check if it looks like valid JSON for tool calls
            if let Ok(value) = serde_json::from_str::<Value>(json_content) {
                match value {
                    Value::Object(ref obj) => {
                        // Check for tool call structure
                        obj.contains_key("name") || obj.contains_key("function")
                    }
                    Value::Array(ref arr) => {
                        // Check if array contains tool-like objects
                        arr.iter().any(|v| {
601
602
603
604
605
                            if let Some(obj) = v.as_object() {
                                obj.contains_key("name") || obj.contains_key("function")
                            } else {
                                false
                            }
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
                        })
                    }
                    _ => false,
                }
            } else {
                false
            }
        } else {
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_parse_single_tool_call() {
        let parser = JsonParser::new();
        let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;

628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
    }

    #[tokio::test]
    async fn test_extract_json_with_normal_text() {
        let parser = JsonParser::new();

        // Test extraction of JSON from mixed text
        let input =
            r#"Here is some text before {"name": "test", "arguments": {}} and some text after."#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();

        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "test");
        assert_eq!(
            normal_text,
            "Here is some text before  and some text after."
        );
    }

    #[tokio::test]
    async fn test_extract_json_array_with_normal_text() {
        let parser = JsonParser::new();

        // Test extraction of JSON array from mixed text
        let input = r#"Prefix text [{"name": "func1", "arguments": {}}, {"name": "func2", "arguments": {}}] suffix text"#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();

        assert_eq!(tool_calls.len(), 2);
        assert_eq!(tool_calls[0].function.name, "func1");
        assert_eq!(tool_calls[1].function.name, "func2");
        assert_eq!(normal_text, "Prefix text  suffix text");
663
664
665
666
667
668
669
670
671
672
    }

    #[tokio::test]
    async fn test_parse_multiple_tool_calls() {
        let parser = JsonParser::new();
        let input = r#"[
            {"name": "get_weather", "arguments": {"location": "SF"}},
            {"name": "search", "arguments": {"query": "news"}}
        ]"#;

673
674
675
676
677
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 2);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(tool_calls[1].function.name, "search");
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
678
679
680
681
682
683
684
    }

    #[tokio::test]
    async fn test_parse_with_parameters_key() {
        let parser = JsonParser::new();
        let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20}}"#;

685
686
687
688
689
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "calculate");
        assert!(tool_calls[0].function.arguments.contains("10"));
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
690
691
692
693
    }

    #[tokio::test]
    async fn test_parse_with_wrapper_tokens() {
694
695
696
697
698
        let parser = JsonParser::with_config(TokenConfig {
            start_tokens: vec!["<tool>".to_string()],
            end_tokens: vec!["</tool>".to_string()],
            separator: ", ".to_string(),
        });
699
700

        let input = r#"<tool>{"name": "test", "arguments": {}}</tool>"#;
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "test");
        assert_eq!(normal_text, ""); // Wrapper tokens with no extra text
    }

    #[tokio::test]
    async fn test_parse_with_start_token_invalid_json() {
        let parser = JsonParser::with_config(TokenConfig {
            start_tokens: vec!["<|python_tag|>".to_string()],
            end_tokens: vec!["".to_string()],
            separator: ";".to_string(),
        });

        let input = r#"Hello world <|python_tag|>this is not valid json at all"#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 0);
        assert_eq!(normal_text, input); // Should return entire original text when JSON parsing fails
    }

    #[tokio::test]
    async fn test_parse_with_normal_text() {
        let parser = JsonParser::new();
        let input = r#"Here is the weather data: {"name": "get_weather", "arguments": {"location": "SF"}} Let me know if you need more info."#;

        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(
            normal_text,
            "Here is the weather data:  Let me know if you need more info."
        ); // Normal text is now extracted when JSON is found in mixed content
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
    }

    #[test]
    fn test_detect_format() {
        let parser = JsonParser::new();

        assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
        assert!(parser.detect_format(r#"[{"name": "test"}]"#));
        assert!(!parser.detect_format("plain text"));
        assert!(!parser.detect_format(r#"{"key": "value"}"#));
    }

    #[tokio::test]
    async fn test_streaming_parse() {
        // Just verify that streaming eventually produces a complete tool call
        let parser = JsonParser::new();
        let mut state = ParseState::new();

751
752
        // Send complete JSON in one go
        // TODO simplified version, address more complex version
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
        let full_json = r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#;

        let result = parser
            .parse_incremental(full_json, &mut state)
            .await
            .unwrap();

        // Should get a complete tool immediately with complete JSON
        match result {
            StreamResult::ToolComplete(tool) => {
                assert_eq!(tool.function.name, "get_weather");
                assert!(tool.function.arguments.contains("SF"));
            }
            _ => panic!("Expected ToolComplete for complete JSON input"),
        }
    }
}