json_parser.rs 8.97 KB
Newer Older
1
2
3
use async_trait::async_trait;
use serde_json::Value;

4
5
use crate::protocols::spec::Tool;

6
7
use crate::tool_parser::{
    errors::{ToolParserError, ToolParserResult},
8
    parsers::helpers,
9
10
    partial_json::PartialJson,
    traits::ToolParser,
11
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
12
13
14
15
};

/// JSON format parser for tool calls
///
16
/// Handles pure JSON formats for function calling:
17
18
19
20
21
22
/// - Single tool call: {"name": "fn", "arguments": {...}}
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
pub struct JsonParser {
    /// Parser for handling incomplete JSON during streaming
    partial_json: PartialJson,
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

    /// Buffer for accumulating incomplete patterns across chunks
    buffer: String,

    /// Stores complete tool call info (name and arguments) for each tool being parsed
    prev_tool_call_arr: Vec<Value>,

    /// Index of currently streaming tool call (-1 means no active tool)
    current_tool_id: i32,

    /// Flag for whether current tool's name has been sent to client
    current_tool_name_sent: bool,

    /// Tracks raw JSON string content streamed to client for each tool's arguments
    streamed_args_for_tool: Vec<String>,

    /// Separator between multiple tool calls
    tool_call_separator: &'static str,
41
42
43
}

impl JsonParser {
44
    /// Create a new JSON parser
45
46
47
    pub fn new() -> Self {
        Self {
            partial_json: PartialJson::default(),
48
49
50
51
52
53
            buffer: String::new(),
            prev_tool_call_arr: Vec::new(),
            current_tool_id: -1,
            current_tool_name_sent: false,
            streamed_args_for_tool: Vec::new(),
            tool_call_separator: ",",
54
55
56
        }
    }

57
58
59
60
61
62
63
64
65
66
67
68
69
    /// Try to extract a first valid JSON object or array from text that may contain other content
    /// Returns (json_string, normal_text) where normal_text is text before and after the JSON
    fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
        let mut in_string = false;
        let mut escape = false;
        let mut stack: Vec<char> = Vec::with_capacity(8);
        let mut start: Option<usize> = None;

        for (i, ch) in text.char_indices() {
            if escape {
                escape = false;
                continue;
            }
70

71
72
73
74
75
76
77
            match ch {
                '\\' if in_string => escape = true,
                '"' => in_string = !in_string,
                _ if in_string => {}
                '{' | '[' => {
                    if start.is_none() {
                        start = Some(i);
78
                    }
79
                    stack.push(ch);
80
                }
81
82
83
84
85
86
                '}' | ']' => {
                    let Some(open) = stack.pop() else {
                        // Stray closer - reset and continue looking for next valid JSON
                        start = None;
                        continue;
                    };
87

88
89
90
91
92
93
94
                    let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
                    if !valid {
                        // Mismatch - reset and continue looking
                        start = None;
                        stack.clear();
                        continue;
                    }
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
                    if stack.is_empty() {
                        let s = start.unwrap();
                        let e = i + ch.len_utf8();
                        let potential_json = &text[s..e];

                        // Validate that this is actually valid JSON before returning
                        if serde_json::from_str::<Value>(potential_json).is_ok() {
                            let json = potential_json.to_string();
                            let normal = format!("{}{}", &text[..s], &text[e..]);
                            return Some((json, normal));
                        } else {
                            // Not valid JSON, reset and continue looking
                            start = None;
                            continue;
110
111
112
                        }
                    }
                }
113
                _ => {}
114
115
116
            }
        }
        None
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    }

    /// Parse a single JSON object into a ToolCall
    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
        // Check if this looks like a tool call
        let name = obj
            .get("name")
            .or_else(|| obj.get("function"))
            .and_then(|v| v.as_str());

        if let Some(name) = name {
            // Get arguments - support both "arguments" and "parameters" keys
            let empty_obj = Value::Object(serde_json::Map::new());
            let args = obj
                .get("arguments")
                .or_else(|| obj.get("parameters"))
                .unwrap_or(&empty_obj);

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
                    name: name.to_string(),
                    arguments,
                },
            }))
        } else {
            Ok(None)
        }
    }

    /// Parse JSON value(s) into tool calls
    fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
        let mut tools = Vec::new();

        match value {
            Value::Array(arr) => {
                // Parse each element in the array
                for item in arr {
                    if let Some(tool) = self.parse_single_object(item)? {
                        tools.push(tool);
                    }
                }
            }
            Value::Object(_) => {
                // Single tool call
                if let Some(tool) = self.parse_single_object(value)? {
                    tools.push(tool);
                }
            }
            _ => {
                // Not a valid tool call format
                return Ok(vec![]);
            }
        }

        Ok(tools)
    }
}

impl Default for JsonParser {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl ToolParser for JsonParser {
187
    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
188
189
190
191
192
193
194
195
196
        // Always use extract_json_from_text to handle both pure JSON and mixed content
        if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
            let parsed = serde_json::from_str::<Value>(&extracted_json)
                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
                .and_then(|v| self.parse_json_value(&v));

            match parsed {
                Ok(tools) => return Ok((normal_text, tools)),
                Err(e) => tracing::warn!("parse_complete failed: {:?}", e),
197
198
199
            }
        }

200
201
        // No valid JSON found, return original text as normal text
        Ok((text.to_string(), vec![]))
202
203
204
    }

    async fn parse_incremental(
205
        &mut self,
206
        chunk: &str,
207
208
209
210
211
212
213
        tools: &[Tool],
    ) -> ToolParserResult<StreamingParseResult> {
        // Append new text to buffer
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();

        // Check if current_text has tool_call
214
        let has_tool_start = self.has_tool_markers(current_text)
215
216
217
218
219
220
221
222
223
224
            || (self.current_tool_id >= 0 && current_text.starts_with(self.tool_call_separator));

        if !has_tool_start {
            let normal_text = self.buffer.clone();
            self.buffer.clear();

            return Ok(StreamingParseResult {
                normal_text,
                calls: vec![],
            });
225
226
        }

227
228
        // Build tool indices
        let tool_indices = helpers::get_tool_indices(tools);
229

230
231
232
233
234
235
236
        // Determine start index for JSON parsing
        // JSON can start with [ (array) or { (single object)
        let start_idx = if let Some(bracket_pos) = current_text.find('[') {
            let brace_pos = current_text.find('{');
            match brace_pos {
                Some(bp) if bp < bracket_pos => bp,
                _ => bracket_pos,
237
            }
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
        } else if let Some(brace_pos) = current_text.find('{') {
            brace_pos
        } else if self.current_tool_id >= 0 && current_text.starts_with(self.tool_call_separator) {
            self.tool_call_separator.len()
        } else {
            0
        };

        helpers::handle_json_tool_streaming(
            current_text,
            start_idx,
            &mut self.partial_json,
            &tool_indices,
            &mut self.buffer,
            &mut self.current_tool_id,
            &mut self.current_tool_name_sent,
            &mut self.streamed_args_for_tool,
            &mut self.prev_tool_call_arr,
        )
257
258
    }

259
    fn has_tool_markers(&self, text: &str) -> bool {
260
        let trimmed = text.trim();
261
        trimmed.starts_with('[') || trimmed.starts_with('{')
262
    }
263
264
265
266

    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
267
}