json_parser.rs 28.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
use async_trait::async_trait;
use regex::Regex;
use serde_json::Value;

use crate::tool_parser::{
    errors::{ToolParserError, ToolParserResult},
    partial_json::PartialJson,
    state::ParseState,
    traits::ToolParser,
10
    types::{FunctionCall, StreamResult, TokenConfig, ToolCall},
11
12
13
14
15
16
17
18
19
20
21
};

/// JSON format parser for tool calls
///
/// Handles various JSON formats for function calling:
/// - Single tool call: {"name": "fn", "arguments": {...}}
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
///
/// Supports configurable token markers for different models
pub struct JsonParser {
22
23
    /// Token configuration for parsing
    token_config: TokenConfig,
24
25
26
27
28
29
30
31
32
    /// Parser for handling incomplete JSON during streaming
    partial_json: PartialJson,
    /// Regex patterns for extracting content between tokens
    extractors: Vec<Regex>,
}

impl JsonParser {
    /// Create a new JSON parser with default configuration
    pub fn new() -> Self {
33
34
35
36
37
        Self::with_config(TokenConfig {
            start_tokens: vec![],
            end_tokens: vec![],
            separator: ", ".to_string(),
        })
38
39
40
    }

    /// Create a parser with custom token configuration
41
    pub fn with_config(config: TokenConfig) -> Self {
42
        // Build extraction patterns for each token pair
43
44
        let extractors: Vec<Regex> = config
            .iter_pairs()
45
46
47
48
49
50
51
52
53
54
55
56
57
            .filter_map(|(start, end)| {
                if !start.is_empty() && !end.is_empty() {
                    // Use (?s) flag to enable DOTALL mode so . matches newlines
                    let pattern =
                        format!(r"(?s){}(.*?){}", regex::escape(start), regex::escape(end));
                    Regex::new(&pattern).ok()
                } else {
                    None
                }
            })
            .collect();

        Self {
58
            token_config: config,
59
60
61
62
63
64
65
            partial_json: PartialJson::default(),
            extractors,
        }
    }

    /// Extract JSON content from text, handling wrapper tokens if configured
    fn extract_json_content<'a>(&self, text: &'a str) -> &'a str {
66
        let mut content = text;
67

68
        // Try each extractor pattern (for tokens with both start and end)
69
70
71
        for extractor in &self.extractors {
            if let Some(captures) = extractor.captures(content) {
                if let Some(matched) = captures.get(1) {
72
                    return matched.as_str().trim();
73
74
75
76
77
                }
            }
        }

        // Handle special case where there's a start token but no end token
78
        for (start, end) in self.token_config.iter_pairs() {
79
            if !start.is_empty() && end.is_empty() {
80
81
82
83
84
                // Find the start token and extract everything after it
                if let Some(pos) = content.find(start) {
                    content = &content[pos + start.len()..];
                    return content.trim();
                }
85
86
87
            }
        }

88
89
90
        content.trim()
    }

91
92
93
94
95
96
97
98
99
100
101
102
103
    /// Try to extract a first valid JSON object or array from text that may contain other content
    /// Returns (json_string, normal_text) where normal_text is text before and after the JSON
    fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
        let mut in_string = false;
        let mut escape = false;
        let mut stack: Vec<char> = Vec::with_capacity(8);
        let mut start: Option<usize> = None;

        for (i, ch) in text.char_indices() {
            if escape {
                escape = false;
                continue;
            }
104

105
106
107
108
109
110
111
            match ch {
                '\\' if in_string => escape = true,
                '"' => in_string = !in_string,
                _ if in_string => {}
                '{' | '[' => {
                    if start.is_none() {
                        start = Some(i);
112
                    }
113
                    stack.push(ch);
114
                }
115
116
117
118
119
120
                '}' | ']' => {
                    let Some(open) = stack.pop() else {
                        // Stray closer - reset and continue looking for next valid JSON
                        start = None;
                        continue;
                    };
121

122
123
124
125
126
127
128
                    let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
                    if !valid {
                        // Mismatch - reset and continue looking
                        start = None;
                        stack.clear();
                        continue;
                    }
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
                    if stack.is_empty() {
                        let s = start.unwrap();
                        let e = i + ch.len_utf8();
                        let potential_json = &text[s..e];

                        // Validate that this is actually valid JSON before returning
                        if serde_json::from_str::<Value>(potential_json).is_ok() {
                            let json = potential_json.to_string();
                            let normal = format!("{}{}", &text[..s], &text[e..]);
                            return Some((json, normal));
                        } else {
                            // Not valid JSON, reset and continue looking
                            start = None;
                            continue;
144
145
146
                        }
                    }
                }
147
                _ => {}
148
149
150
            }
        }
        None
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    }

    /// Parse a single JSON object into a ToolCall
    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
        // Check if this looks like a tool call
        let name = obj
            .get("name")
            .or_else(|| obj.get("function"))
            .and_then(|v| v.as_str());

        if let Some(name) = name {
            // Get arguments - support both "arguments" and "parameters" keys
            let empty_obj = Value::Object(serde_json::Map::new());
            let args = obj
                .get("arguments")
                .or_else(|| obj.get("parameters"))
                .unwrap_or(&empty_obj);

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;

            // Generate a unique ID if not provided
            let id = obj
                .get("id")
                .and_then(|v| v.as_str())
                .map(String::from)
                .unwrap_or_else(|| format!("call_{}", uuid::Uuid::new_v4()));

            Ok(Some(ToolCall {
                id,
                r#type: "function".to_string(),
                function: FunctionCall {
                    name: name.to_string(),
                    arguments,
                },
            }))
        } else {
            Ok(None)
        }
    }

    /// Parse JSON value(s) into tool calls
    fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
        let mut tools = Vec::new();

        match value {
            Value::Array(arr) => {
                // Parse each element in the array
                for item in arr {
                    if let Some(tool) = self.parse_single_object(item)? {
                        tools.push(tool);
                    }
                }
            }
            Value::Object(_) => {
                // Single tool call
                if let Some(tool) = self.parse_single_object(value)? {
                    tools.push(tool);
                }
            }
            _ => {
                // Not a valid tool call format
                return Ok(vec![]);
            }
        }

        Ok(tools)
    }

    /// Check if text contains potential tool call markers
    fn has_tool_markers(&self, text: &str) -> bool {
        // If no start tokens configured, check for JSON structure
224
        if self.token_config.start_tokens.is_empty() {
225
226
227
228
229
            // For JSON, we just need to see the start of an object or array
            return text.contains('{') || text.contains('[');
        }

        // Check for any start token
230
231
232
233
        self.token_config
            .start_tokens
            .iter()
            .any(|token| text.contains(token))
234
235
236
237
238
239
240
241
242
243
244
    }
}

impl Default for JsonParser {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl ToolParser for JsonParser {
245
    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
246
247
248
249
250
251
        // Check if we have multiple start tokens (e.g., multiple <|python_tag|> markers)
        if !self.token_config.start_tokens.is_empty() {
            let start_token = &self.token_config.start_tokens[0];
            if !start_token.is_empty() && text.matches(start_token).count() > 1 {
                // We have multiple occurrences of the start token
                let mut all_tools = Vec::new();
252
                let mut all_normal_text = String::new();
253
254
255
                let mut remaining = text;

                while let Some(start_pos) = remaining.find(start_token.as_str()) {
256
257
258
                    // Add text before this start token to normal text
                    all_normal_text.push_str(&remaining[..start_pos]);

259
260
261
262
263
264
265
266
267
268
269
270
271
                    // Extract content after this start token
                    let after_token = &remaining[start_pos + start_token.len()..];

                    // Find where this JSON ends (look for the next start token or end of string)
                    let end_pos = if let Some(next_start) = after_token.find(start_token.as_str()) {
                        next_start
                    } else {
                        after_token.len()
                    };

                    let json_content = &after_token[..end_pos];

                    // Try to extract and parse JSON from this segment
272
273
274
                    if let Some((extracted, segment_normal_text)) =
                        self.extract_json_from_text(json_content)
                    {
275
276
277
278
279
                        if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
                            if let Ok(tools) = self.parse_json_value(&value) {
                                all_tools.extend(tools);
                            }
                        }
280
281
282
283
284
                        // Add the normal text from this segment
                        all_normal_text.push_str(&segment_normal_text);
                    } else {
                        // If no JSON found, add the entire content as normal text
                        all_normal_text.push_str(json_content);
285
286
287
288
289
290
291
292
293
                    }

                    // Move to the next segment
                    remaining = &remaining[start_pos + start_token.len() + end_pos..];
                    if remaining.is_empty() {
                        break;
                    }
                }

294
295
296
297
                // Add any remaining text
                all_normal_text.push_str(remaining);

                return Ok((all_normal_text, all_tools));
298
299
300
            }
        }

301
302
303
        // Extract JSON content from wrapper tokens if present
        let json_content = self.extract_json_content(text);

304
        // Try to parse as JSON first
305
        match serde_json::from_str::<Value>(json_content) {
306
307
308
309
            Ok(value) => {
                let tools = self.parse_json_value(&value)?;
                Ok((String::new(), tools))
            }
310
            Err(_) => {
311
                // If parse failed, check if we have multiple JSON objects separated by the configured separator
312
313
                // Only do this if we can reasonably expect multiple complete JSON objects
                // (i.e., text starts and ends with JSON-like structure)
314
315
                if !self.token_config.separator.is_empty()
                    && json_content.contains(&self.token_config.separator)
316
317
                    && json_content.trim().starts_with('{')
                    && json_content.trim().ends_with('}')
318
319
320
321
322
323
                {
                    let mut all_tools = Vec::new();

                    // Split by separator and try to parse each part
                    let parts: Vec<&str> =
                        json_content.split(&self.token_config.separator).collect();
324
325
                    let mut normal_parts = Vec::new();

326
327
328
                    for part in parts {
                        let trimmed = part.trim();
                        if trimmed.is_empty() {
329
                            normal_parts.push(trimmed.to_string());
330
331
332
333
334
335
336
337
                            continue;
                        }

                        // Try to parse this part as JSON
                        if let Ok(value) = serde_json::from_str::<Value>(trimmed) {
                            if let Ok(tools) = self.parse_json_value(&value) {
                                all_tools.extend(tools);
                            }
338
339
340
341
                            normal_parts.push(trimmed.to_string());
                        } else if let Some((extracted, part_normal_text)) =
                            self.extract_json_from_text(trimmed)
                        {
342
343
344
345
346
347
                            // Try extracting JSON from this part
                            if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
                                if let Ok(tools) = self.parse_json_value(&value) {
                                    all_tools.extend(tools);
                                }
                            }
348
349
350
                            normal_parts.push(part_normal_text);
                        } else {
                            normal_parts.push(trimmed.to_string());
351
352
353
                        }
                    }

354
355
356
357
                    // Rejoin with the original separator to preserve it
                    let all_normal_text = normal_parts.join(&self.token_config.separator);

                    return Ok((all_normal_text, all_tools));
358
359
                }

360
                // If no wrapper tokens configured and parse failed, try to extract JSON from mixed text
361
                if self.token_config.start_tokens.is_empty() {
362
363
364
365
                    if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
                        if let Ok(value) = serde_json::from_str::<Value>(&extracted_json) {
                            let tools = self.parse_json_value(&value)?;
                            return Ok((normal_text, tools));
366
367
368
                        }
                    }
                }
369
370
371

                // No valid JSON found, return original text as normal text
                Ok((text.to_string(), vec![]))
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
            }
        }
    }

    async fn parse_incremental(
        &self,
        chunk: &str,
        state: &mut ParseState,
    ) -> ToolParserResult<StreamResult> {
        state.buffer.push_str(chunk);

        // Check if we have potential tool calls
        if !self.has_tool_markers(&state.buffer) {
            // No tool markers, return as incomplete
            return Ok(StreamResult::Incomplete);
        }

389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
        // Extract JSON content first to check for separators
        let extracted_json = self.extract_json_content(&state.buffer);

        // Handle multiple JSON objects with separators
        // Check if we have a separator and potentially multiple JSON objects
        let separator = &self.token_config.separator;
        if !separator.is_empty() && extracted_json.contains(separator.as_str()) {
            // Try to find a complete JSON object before the separator
            if let Some(separator_pos) = extracted_json.find(separator.as_str()) {
                // Get JSON before separator
                let before_separator = &extracted_json[..separator_pos];

                // Try to parse the JSON before the separator
                match serde_json::from_str::<Value>(before_separator) {
                    Ok(value) => {
                        // Parse tool calls from this JSON
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // We need to figure out how much to remove from the original buffer
                            // Find where the separator is in the original buffer and remove up to and including it
                            if let Some(sep_in_original) = state.buffer.find(separator.as_str()) {
                                let remaining =
                                    state.buffer[sep_in_original + separator.len()..].to_string();
                                state.buffer = remaining;
                            }

                            // Return the first tool as complete
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
                        }
                    }
                    Err(_) => {
                        // Failed to parse, continue to try other methods
                    }
                }
            }
        }

        // Handle multiple start tokens (e.g., multiple <|python_tag|> markers)
        if !self.token_config.start_tokens.is_empty() {
            let start_token = &self.token_config.start_tokens[0];
            if !start_token.is_empty() {
                // Find all occurrences of start token
                let occurrences: Vec<_> =
                    state.buffer.match_indices(start_token.as_str()).collect();
                if occurrences.len() > 1 {
                    // We have multiple start tokens, try to process the first complete one
                    let first_pos = occurrences[0].0;
                    let second_pos = occurrences[1].0;

                    // Extract content between first and second start token
                    let first_json_section = &state.buffer[first_pos..second_pos];
                    let json_content = self.extract_json_content(first_json_section);

                    // Try to parse this as complete JSON
                    if let Ok(value) = serde_json::from_str::<Value>(json_content) {
                        // Parse tool calls from this JSON
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // Remove the processed section from buffer
                            let remaining = state.buffer[second_pos..].to_string();
                            state.buffer = remaining;

                            // Return the first tool as complete
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
                        }
                    }
                }
            }
        }

        // Regular single JSON parsing
464
465
466
467
468
469
470
471
        // Extract JSON content
        let json_content = self.extract_json_content(&state.buffer);

        // Try to parse with partial JSON parser
        match self.partial_json.parse_value(json_content) {
            Ok((value, consumed)) => {
                // Check if we have a complete JSON structure
                if consumed == json_content.len() {
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
                    // Check if this is truly complete or just has null from incomplete parsing
                    // We need to ensure the JSON actually ends properly (not cut off mid-key)
                    let trimmed = json_content.trim();
                    let looks_complete = trimmed.ends_with('}') || trimmed.ends_with(']');

                    if looks_complete {
                        // Complete JSON, parse tool calls
                        let tools = self.parse_json_value(&value)?;
                        if !tools.is_empty() {
                            // Clear buffer since we consumed everything
                            state.buffer.clear();

                            // Return the first tool as complete
                            // TODO simplified version, address more complex version
                            if let Some(tool) = tools.into_iter().next() {
                                return Ok(StreamResult::ToolComplete(tool));
                            }
489
490
491
492
493
                        }
                    }
                } else {
                    // Partial JSON, try to extract tool name
                    if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
494
                        // TODO simplified version, address more complex version
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
                        // Just return the tool name once we see it
                        if !state.in_string {
                            state.in_string = true; // Use as a flag for "name sent"
                            return Ok(StreamResult::ToolName {
                                index: 0,
                                name: name.to_string(),
                            });
                        }

                        // Check for complete arguments
                        if let Some(args) =
                            value.get("arguments").or_else(|| value.get("parameters"))
                        {
                            if let Ok(args_str) = serde_json::to_string(args) {
                                // Return arguments as a single update
                                return Ok(StreamResult::ToolArguments {
                                    index: 0,
                                    arguments: args_str,
                                });
                            }
                        }
                    }
                }
            }
            Err(_) => {
                // Failed to parse even as partial JSON
                // Keep buffering
            }
        }

        Ok(StreamResult::Incomplete)
    }

    fn detect_format(&self, text: &str) -> bool {
        // Check if text contains JSON-like structure
        if self.has_tool_markers(text) {
            // Try to extract and parse
            let json_content = self.extract_json_content(text);

            // Check if it looks like valid JSON for tool calls
            if let Ok(value) = serde_json::from_str::<Value>(json_content) {
                match value {
                    Value::Object(ref obj) => {
                        // Check for tool call structure
                        obj.contains_key("name") || obj.contains_key("function")
                    }
                    Value::Array(ref arr) => {
                        // Check if array contains tool-like objects
                        arr.iter().any(|v| {
544
545
546
547
548
                            if let Some(obj) = v.as_object() {
                                obj.contains_key("name") || obj.contains_key("function")
                            } else {
                                false
                            }
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
                        })
                    }
                    _ => false,
                }
            } else {
                false
            }
        } else {
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_parse_single_tool_call() {
        let parser = JsonParser::new();
        let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
    }

    #[tokio::test]
    async fn test_extract_json_with_normal_text() {
        let parser = JsonParser::new();

        // Test extraction of JSON from mixed text
        let input =
            r#"Here is some text before {"name": "test", "arguments": {}} and some text after."#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();

        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "test");
        assert_eq!(
            normal_text,
            "Here is some text before  and some text after."
        );
    }

    #[tokio::test]
    async fn test_extract_json_array_with_normal_text() {
        let parser = JsonParser::new();

        // Test extraction of JSON array from mixed text
        let input = r#"Prefix text [{"name": "func1", "arguments": {}}, {"name": "func2", "arguments": {}}] suffix text"#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();

        assert_eq!(tool_calls.len(), 2);
        assert_eq!(tool_calls[0].function.name, "func1");
        assert_eq!(tool_calls[1].function.name, "func2");
        assert_eq!(normal_text, "Prefix text  suffix text");
606
607
608
609
610
611
612
613
614
615
    }

    #[tokio::test]
    async fn test_parse_multiple_tool_calls() {
        let parser = JsonParser::new();
        let input = r#"[
            {"name": "get_weather", "arguments": {"location": "SF"}},
            {"name": "search", "arguments": {"query": "news"}}
        ]"#;

616
617
618
619
620
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 2);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(tool_calls[1].function.name, "search");
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
621
622
623
624
625
626
627
    }

    #[tokio::test]
    async fn test_parse_with_parameters_key() {
        let parser = JsonParser::new();
        let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20}}"#;

628
629
630
631
632
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "calculate");
        assert!(tool_calls[0].function.arguments.contains("10"));
        assert_eq!(normal_text, ""); // Pure JSON should have no normal text
633
634
635
636
    }

    #[tokio::test]
    async fn test_parse_with_wrapper_tokens() {
637
638
639
640
641
        let parser = JsonParser::with_config(TokenConfig {
            start_tokens: vec!["<tool>".to_string()],
            end_tokens: vec!["</tool>".to_string()],
            separator: ", ".to_string(),
        });
642
643

        let input = r#"<tool>{"name": "test", "arguments": {}}</tool>"#;
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "test");
        assert_eq!(normal_text, ""); // Wrapper tokens with no extra text
    }

    #[tokio::test]
    async fn test_parse_with_start_token_invalid_json() {
        let parser = JsonParser::with_config(TokenConfig {
            start_tokens: vec!["<|python_tag|>".to_string()],
            end_tokens: vec!["".to_string()],
            separator: ";".to_string(),
        });

        let input = r#"Hello world <|python_tag|>this is not valid json at all"#;
        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 0);
        assert_eq!(normal_text, input); // Should return entire original text when JSON parsing fails
    }

    #[tokio::test]
    async fn test_parse_with_normal_text() {
        let parser = JsonParser::new();
        let input = r#"Here is the weather data: {"name": "get_weather", "arguments": {"location": "SF"}} Let me know if you need more info."#;

        let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
        assert_eq!(tool_calls.len(), 1);
        assert_eq!(tool_calls[0].function.name, "get_weather");
        assert_eq!(
            normal_text,
            "Here is the weather data:  Let me know if you need more info."
        ); // Normal text is now extracted when JSON is found in mixed content
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
    }

    #[test]
    fn test_detect_format() {
        let parser = JsonParser::new();

        assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
        assert!(parser.detect_format(r#"[{"name": "test"}]"#));
        assert!(!parser.detect_format("plain text"));
        assert!(!parser.detect_format(r#"{"key": "value"}"#));
    }

    #[tokio::test]
    async fn test_streaming_parse() {
        // Just verify that streaming eventually produces a complete tool call
        let parser = JsonParser::new();
        let mut state = ParseState::new();

694
695
        // Send complete JSON in one go
        // TODO simplified version, address more complex version
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
        let full_json = r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#;

        let result = parser
            .parse_incremental(full_json, &mut state)
            .await
            .unwrap();

        // Should get a complete tool immediately with complete JSON
        match result {
            StreamResult::ToolComplete(tool) => {
                assert_eq!(tool.function.name, "get_weather");
                assert!(tool.function.arguments.contains("SF"));
            }
            _ => panic!("Expected ToolComplete for complete JSON input"),
        }
    }
}