use async_trait::async_trait; use regex::Regex; use serde_json::Value; use crate::tool_parser::{ errors::{ToolParserError, ToolParserResult}, state::ParseState, traits::ToolParser, types::{FunctionCall, StreamResult, ToolCall}, }; /// GLM-4 MoE format parser for tool calls /// /// Handles the GLM-4 MoE specific format: /// `{name}\n{key}\n{value}\n` /// /// Features: /// - XML-style tags for tool calls /// - Key-value pairs for arguments /// - Support for multiple sequential tool calls pub struct Glm4MoeParser { /// Regex for extracting complete tool calls tool_call_extractor: Regex, /// Regex for extracting function details func_detail_extractor: Regex, /// Regex for extracting argument key-value pairs arg_extractor: Regex, } impl Glm4MoeParser { /// Create a new GLM-4 MoE parser pub fn new() -> Self { // Use (?s) flag for DOTALL mode to handle newlines let tool_call_pattern = r"(?s).*?"; let tool_call_extractor = Regex::new(tool_call_pattern).expect("Valid regex pattern"); let func_detail_pattern = r"(?s)([^\n]*)\n(.*)"; let func_detail_extractor = Regex::new(func_detail_pattern).expect("Valid regex pattern"); let arg_pattern = r"(?s)(.*?)\s*(.*?)"; let arg_extractor = Regex::new(arg_pattern).expect("Valid regex pattern"); Self { tool_call_extractor, func_detail_extractor, arg_extractor, } } /// Check if text contains GLM-4 MoE tool markers fn has_tool_markers(&self, text: &str) -> bool { text.contains("") } /// Parse arguments from key-value pairs fn parse_arguments(&self, args_text: &str) -> ToolParserResult> { let mut arguments = serde_json::Map::new(); for capture in self.arg_extractor.captures_iter(args_text) { let key = capture.get(1).map_or("", |m| m.as_str()).trim(); let value_str = capture.get(2).map_or("", |m| m.as_str()).trim(); // Try to parse the value as JSON first, fallback to string let value = if let Ok(json_val) = serde_json::from_str::(value_str) { json_val } else { // Try parsing as Python literal (similar to Python's ast.literal_eval) if value_str == "true" || value_str == "True" { Value::Bool(true) } else if value_str == "false" || value_str == "False" { Value::Bool(false) } else if value_str == "null" || value_str == "None" { Value::Null } else if let Ok(num) = value_str.parse::() { Value::Number(num.into()) } else if let Ok(num) = value_str.parse::() { if let Some(n) = serde_json::Number::from_f64(num) { Value::Number(n) } else { Value::String(value_str.to_string()) } } else { Value::String(value_str.to_string()) } }; arguments.insert(key.to_string(), value); } Ok(arguments) } /// Parse a single tool call block fn parse_tool_call(&self, block: &str) -> ToolParserResult> { if let Some(captures) = self.func_detail_extractor.captures(block) { // Get function name let func_name = captures.get(1).map_or("", |m| m.as_str()).trim(); // Get arguments text let args_text = captures.get(2).map_or("", |m| m.as_str()); // Parse arguments let arguments = self.parse_arguments(args_text)?; let arguments_str = serde_json::to_string(&arguments) .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?; // Generate ID let id = format!("glm4_call_{}", uuid::Uuid::new_v4()); Ok(Some(ToolCall { id, r#type: "function".to_string(), function: FunctionCall { name: func_name.to_string(), arguments: arguments_str, }, })) } else { Ok(None) } } } impl Default for Glm4MoeParser { fn default() -> Self { Self::new() } } #[async_trait] impl ToolParser for Glm4MoeParser { async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec)> { // Check if text contains GLM-4 MoE format if !self.has_tool_markers(text) { return Ok((text.to_string(), vec![])); } // Collect matches with positions and parse tools in one pass let matches: Vec<_> = self.tool_call_extractor.find_iter(text).collect(); let mut tools = Vec::new(); for mat in matches.iter() { if let Some(tool) = self.parse_tool_call(mat.as_str())? { tools.push(tool); } } // Extract normal text using first and last match positions let normal_text = if tools.is_empty() { text.to_string() } else { let first_start = matches[0].start(); let last_end = matches.last().unwrap().end(); let before = if first_start > 0 { &text[..first_start] } else { "" }; let after = if last_end < text.len() { &text[last_end..] } else { "" }; format!("{}{}", before, after) }; Ok((normal_text, tools)) } async fn parse_incremental( &self, chunk: &str, state: &mut ParseState, ) -> ToolParserResult { state.buffer.push_str(chunk); // Check for tool markers if !self.has_tool_markers(&state.buffer) { // No tool markers detected - return all buffered content as normal text let normal_text = std::mem::take(&mut state.buffer); return Ok(StreamResult::NormalText(normal_text)); } // Check for text before tool markers and extract it as normal text if let Some(marker_pos) = state.buffer.find("") { if marker_pos > 0 { // We have text before the tool marker - extract it as normal text let normal_text: String = state.buffer.drain(..marker_pos).collect(); return Ok(StreamResult::NormalText(normal_text)); } } // Look for start of tool call if let Some(start_pos) = state.buffer.find("") { // Look for the end of this tool call let search_from = start_pos + "".len(); if let Some(end_pos) = state.buffer[search_from..].find("") { let end_abs = search_from + end_pos + "".len(); // Extract and parse the complete tool call let tool_call_text = &state.buffer[start_pos..end_abs]; if let Some(tool) = self.parse_tool_call(tool_call_text)? { // Remove the processed part from buffer state.buffer.drain(..end_abs); return Ok(StreamResult::ToolComplete(tool)); } } else { // Tool call not complete yet, try to extract partial info let partial = &state.buffer[search_from..]; // Try to extract function name (first line after ) if let Some(name_end) = partial.find('\n') { let func_name = partial[..name_end].trim(); if !func_name.is_empty() && !state.in_string { state.in_string = true; // Mark name as sent return Ok(StreamResult::ToolName { index: 0, name: func_name.to_string(), }); } // Try to extract partial arguments let args_text = &partial[name_end + 1..]; let partial_args = self.parse_arguments(args_text)?; if !partial_args.is_empty() { let args_str = serde_json::to_string(&partial_args) .unwrap_or_else(|_| "{}".to_string()); return Ok(StreamResult::ToolArguments { index: 0, arguments: args_str, }); } } } } Ok(StreamResult::Incomplete) } fn detect_format(&self, text: &str) -> bool { self.has_tool_markers(text) } } #[cfg(test)] mod tests { use super::*; #[tokio::test] async fn test_parse_glm4_single_tool() { let parser = Glm4MoeParser::new(); let input = r#"Some text get_weather city Beijing date 2024-06-27 More text"#; let (normal_text, tools) = parser.parse_complete(input).await.unwrap(); assert_eq!(tools.len(), 1); assert_eq!(tools[0].function.name, "get_weather"); assert!(tools[0].function.arguments.contains("Beijing")); assert!(tools[0].function.arguments.contains("2024-06-27")); assert_eq!(normal_text, "Some text\nMore text"); // Text before and after tool call } #[tokio::test] async fn test_parse_glm4_multiple_tools() { let parser = Glm4MoeParser::new(); let input = r#"get_weather city Beijing get_weather city Shanghai "#; let (normal_text, tools) = parser.parse_complete(input).await.unwrap(); assert_eq!(tools.len(), 2); assert_eq!(tools[0].function.name, "get_weather"); assert_eq!(tools[1].function.name, "get_weather"); assert!(tools[0].function.arguments.contains("Beijing")); assert!(tools[1].function.arguments.contains("Shanghai")); assert_eq!(normal_text, ""); // Pure tool calls, no normal text } #[tokio::test] async fn test_parse_glm4_mixed_types() { let parser = Glm4MoeParser::new(); let input = r#"process_data count 42 active true name test "#; let (normal_text, tools) = parser.parse_complete(input).await.unwrap(); assert_eq!(tools.len(), 1); assert_eq!(normal_text, ""); // Pure tool call, no normal text assert_eq!(tools[0].function.name, "process_data"); // Parse arguments to check types let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap(); assert_eq!(args["count"], 42); assert_eq!(args["active"], true); assert_eq!(args["name"], "test"); } #[test] fn test_detect_format() { let parser = Glm4MoeParser::new(); assert!(parser.detect_format("")); assert!(!parser.detect_format("plain text")); assert!(!parser.detect_format("[TOOL_CALLS]")); } }