// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // GLM-4.7 Tool Call Parser // Format: function_nameparam1value1 // Reference: https://huggingface.co/zai-org/GLM-4.7/blob/main/chat_template.jinja use regex::Regex; use serde_json::Value; use std::collections::HashMap; use tracing::warn; use uuid::Uuid; use super::super::ToolDefinition; use super::super::config::Glm47ParserConfig; use super::response::{CalledFunction, ToolCallResponse, ToolCallType}; /// Check if a chunk contains the start of a GLM-4.7 tool call. /// Format: function_name...... pub fn detect_tool_call_start_glm47(chunk: &str, config: &Glm47ParserConfig) -> bool { let start_token = &config.tool_call_start; // Check if we have the complete start token if chunk.contains(start_token.as_str()) { return true; } // Check for partial match at the end of the chunk (for streaming) for i in 1..start_token.len() { if chunk.ends_with(&start_token[..i]) { return true; } } false } /// Find the end position of a GLM-4.7 tool call. /// Returns the position after or the length of the chunk if not found. pub fn find_tool_call_end_position_glm47(chunk: &str, config: &Glm47ParserConfig) -> usize { let end_token = &config.tool_call_end; if let Some(pos) = chunk.find(end_token.as_str()) { pos + end_token.len() } else { chunk.len() } } /// Try to parse GLM-4.7 formatted tool calls from a message. /// Format: function_nameparam1value1 /// Returns (parsed_tool_calls, normal_text_content) pub fn try_tool_call_parse_glm47( message: &str, config: &Glm47ParserConfig, tools: Option<&[ToolDefinition]>, ) -> anyhow::Result<(Vec, Option)> { let (normal_text, tool_calls) = extract_tool_calls(message, config, tools)?; let normal_content = if normal_text.is_empty() { Some("".to_string()) } else { Some(normal_text) }; Ok((tool_calls, normal_content)) } /// Extract tool calls and normal text from message. fn extract_tool_calls( text: &str, config: &Glm47ParserConfig, tools: Option<&[ToolDefinition]>, ) -> anyhow::Result<(String, Vec)> { let mut normal_parts = Vec::new(); let mut calls = Vec::new(); let mut cursor = 0; let start_token = &config.tool_call_start; let end_token = &config.tool_call_end; while cursor < text.len() { // Find next tool call start if let Some(start_pos) = text[cursor..].find(start_token.as_str()) { let abs_start = cursor + start_pos; // Add text before tool call to normal parts normal_parts.push(&text[cursor..abs_start]); // Find the corresponding end token if let Some(end_pos) = text[abs_start..].find(end_token.as_str()) { let abs_end = abs_start + end_pos + end_token.len(); let block = &text[abs_start..abs_end]; // Parse this tool call block; preserve unparseable blocks as // normal text so model output is never silently dropped. match parse_tool_call_block(block, config, tools) { Ok(parsed_call) => calls.push(parsed_call), Err(e) => { warn!("Failed to parse GLM-4.7 tool call block: {e}"); normal_parts.push(block); } } cursor = abs_end; } else { // No end token found -> treat the rest as normal text normal_parts.push(&text[abs_start..]); break; } } else { // No more tool calls normal_parts.push(&text[cursor..]); break; } } let normal_text = normal_parts.join("").trim().to_string(); Ok((normal_text, calls)) } /// Decode XML character entities in a string. /// Handles the five predefined XML entities: < > & " ' fn decode_xml_entities(s: &str) -> String { s.replace("<", "<") .replace(">", ">") .replace("&", "&") .replace(""", "\"") .replace("'", "'") } /// Coerce a raw string value to a JSON Value using the tool's parameter schema. /// Falls back to string if no schema is available or the type is unrecognized. fn coerce_value(raw: &str, schema_type: Option<&str>) -> Value { let trimmed = raw.trim(); // If the value already looks like JSON (object, array, or quoted string), parse it directly if (trimmed.starts_with('{') || trimmed.starts_with('[') || trimmed.starts_with('"')) && let Ok(v) = serde_json::from_str(trimmed) { return v; } // Use schema type hints for coercion when available match schema_type { Some("integer") | Some("int") => { if let Ok(n) = trimmed.parse::() { return Value::Number(n.into()); } } Some("number") | Some("float") | Some("double") => { if let Ok(n) = trimmed.parse::() && let Some(num) = serde_json::Number::from_f64(n) { return Value::Number(num); } } Some("boolean") | Some("bool") => match trimmed.to_lowercase().as_str() { "true" | "1" | "yes" => return Value::Bool(true), "false" | "0" | "no" => return Value::Bool(false), _ => {} }, Some("array") => { // Try JSON parse first, then fall back to comma-separated splitting if let Ok(v) = serde_json::from_str::(trimmed) && v.is_array() { return v; } let items: Vec = trimmed .split(',') .map(|s| Value::String(s.trim().to_string())) .collect(); return Value::Array(items); } Some("null") => { if trimmed == "null" || trimmed == "None" || trimmed.is_empty() { return Value::Null; } } _ => {} } Value::String(raw.to_string()) } /// Look up the JSON Schema type for a parameter by name from a tool's parameter schema. fn get_param_schema_type<'a>( tools: Option<&'a [ToolDefinition]>, function_name: &str, param_name: &str, ) -> Option<&'a str> { let tool = tools?.iter().find(|t| t.name == function_name)?; let schema = tool.parameters.as_ref()?; let props = schema.get("properties")?; let param = props.get(param_name)?; param.get("type")?.as_str() } /// Parse a single GLM-4.7 tool call block /// Format: function_namekey1value1... fn parse_tool_call_block( block: &str, config: &Glm47ParserConfig, tools: Option<&[ToolDefinition]>, ) -> anyhow::Result { // Remove the outer tags let start_token = &config.tool_call_start; let end_token = &config.tool_call_end; let content = block .strip_prefix(start_token.as_str()) .and_then(|s| s.strip_suffix(end_token.as_str())) .ok_or_else(|| anyhow::anyhow!("Invalid tool call block format"))?; // Extract function name (everything before first or end) let arg_key_start = &config.arg_key_start; let function_name = if let Some(pos) = content.find(arg_key_start.as_str()) { content[..pos].trim().to_string() } else { // No arguments, just function name content.trim().to_string() }; if function_name.is_empty() { anyhow::bail!("Empty function name in tool call"); } // Parse key-value pairs let mut arguments = HashMap::new(); let args_section = &content[function_name.len()..]; // Build regex patterns let arg_key_start_escaped = regex::escape(&config.arg_key_start); let arg_key_end_escaped = regex::escape(&config.arg_key_end); let arg_value_start_escaped = regex::escape(&config.arg_value_start); let arg_value_end_escaped = regex::escape(&config.arg_value_end); // Pattern to match: keyvalue // (?s) enables dotall mode so (.*?) matches across newlines — required // because models often emit multi-line content in arg values. let pattern = format!( r"(?s){}([^<]+){}{}(.*?){}", arg_key_start_escaped, arg_key_end_escaped, arg_value_start_escaped, arg_value_end_escaped ); let regex = Regex::new(&pattern)?; for cap in regex.captures_iter(args_section) { let key = cap.get(1).map(|m| m.as_str().trim()).unwrap_or(""); let raw_value = cap.get(2).map(|m| m.as_str()).unwrap_or(""); if !key.is_empty() { // Decode XML entities (e.g. < → <, & → &) before parsing let decoded = decode_xml_entities(raw_value); // Look up the expected type from the tool's parameter schema let schema_type = get_param_schema_type(tools, &function_name, key); let json_value = coerce_value(&decoded, schema_type); arguments.insert(key.to_string(), json_value); } } // Validate function against tools if provided if let Some(tools_list) = tools { let tool_exists = tools_list.iter().any(|t| t.name == function_name); if !tool_exists { anyhow::bail!("Function '{}' not found in available tools", function_name); } } Ok(ToolCallResponse { id: Uuid::new_v4().to_string(), tp: ToolCallType::Function, function: CalledFunction { name: function_name, arguments: serde_json::to_string(&arguments)?, }, }) } #[cfg(test)] mod tests { use super::*; fn get_test_config() -> Glm47ParserConfig { Glm47ParserConfig::default() } #[test] fn test_detect_tool_call_start() { let config = get_test_config(); // Complete start token assert!(detect_tool_call_start_glm47( "get_weather", &config )); // Partial start token (streaming) assert!(detect_tool_call_start_glm47("Some text = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert_eq!( args.get("location").unwrap().as_str().unwrap(), "San Francisco" ); assert_eq!(normal_text, Some("".to_string())); } #[test] fn test_parse_tool_call_with_multiple_args() { let config = get_test_config(); let message = "book_flightfromNYCtoLAXdate2026-03-15"; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); assert_eq!(calls[0].function.name, "book_flight"); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert_eq!(args.get("from").unwrap().as_str().unwrap(), "NYC"); assert_eq!(args.get("to").unwrap().as_str().unwrap(), "LAX"); assert_eq!(args.get("date").unwrap().as_str().unwrap(), "2026-03-15"); } #[test] fn test_parse_tool_call_with_json_value() { let config = get_test_config(); let message = r#"searchfilters{"category": "books", "price_max": 50}"#; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); assert_eq!(calls[0].function.name, "search"); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); let filters = args.get("filters").unwrap(); assert!(filters.is_object()); } #[test] fn test_parse_multiple_tool_calls() { let config = get_test_config(); let message = "get_weatherlocationNYCget_timetimezoneEST"; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 2); assert_eq!(calls[0].function.name, "get_weather"); assert_eq!(calls[1].function.name, "get_time"); } #[test] fn test_parse_with_normal_text() { let config = get_test_config(); let message = "I'll check the weather for you. get_weatherlocationParis"; let (calls, normal_text) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); assert_eq!(calls[0].function.name, "get_weather"); assert_eq!( normal_text, Some("I'll check the weather for you.".to_string()) ); } #[test] fn test_parse_tool_call_no_args() { let config = get_test_config(); let message = "get_current_time"; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); assert_eq!(calls[0].function.name, "get_current_time"); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert!(args.is_empty()); } #[test] fn test_find_tool_call_end_position() { let config = get_test_config(); let chunk = "funckvmore text"; let end_pos = find_tool_call_end_position_glm47(chunk, &config); assert_eq!( &chunk[..end_pos], "funckv" ); } #[test] fn test_parse_multiline_arg_value() { let config = get_test_config(); let message = "write_filepath/tmp/hello.pycontent#!/usr/bin/env python3\nprint(\"Hello, World!\")\n"; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); assert_eq!(calls[0].function.name, "write_file"); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert_eq!(args.get("path").unwrap().as_str().unwrap(), "/tmp/hello.py"); assert!( args.contains_key("content"), "content argument must be parsed even when it contains newlines" ); let content = args.get("content").unwrap().as_str().unwrap(); assert!(content.contains("print(\"Hello, World!\")")); } #[test] fn test_malformed_tool_call() { let config = get_test_config(); // Missing end tag let message = "get_weather"; let result = try_tool_call_parse_glm47(message, &config, None); assert!(result.is_ok()); // Should handle gracefully, no calls extracted let (calls, _) = result.unwrap(); assert_eq!(calls.len(), 0); } #[test] fn test_unparseable_block_preserved_as_normal_text() { let config = get_test_config(); let tools = vec![ToolDefinition { name: "get_weather".to_string(), parameters: None, }]; // Tool call block references a function not in the tools list let message = "Here is the result: unknown_funcx1 done"; let (calls, normal_text) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap(); assert_eq!(calls.len(), 0); // The unparseable block should be preserved in normal text, not dropped let text = normal_text.unwrap(); assert!( text.contains("unknown_func"), "Unparseable block should be in normal text, got: {text}" ); } #[test] fn test_xml_entity_decoding() { let config = get_test_config(); let message = r#"write_filecontentx < y && y > z"#; let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap(); assert_eq!(calls.len(), 1); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert_eq!( args.get("content").unwrap().as_str().unwrap(), "x < y && y > z" ); } #[test] fn test_type_coercion_with_schema() { let config = get_test_config(); let tools = vec![ToolDefinition { name: "set_temperature".to_string(), parameters: Some(serde_json::json!({ "type": "object", "properties": { "degrees": {"type": "number"}, "enabled": {"type": "boolean"}, "count": {"type": "integer"}, "label": {"type": "string"} } })), }]; let message = "set_temperaturedegrees72.5enabledtruecount3labelwarm"; let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap(); assert_eq!(calls.len(), 1); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); // number coercion assert_eq!(args.get("degrees").unwrap().as_f64().unwrap(), 72.5); // boolean coercion assert!(args.get("enabled").unwrap().as_bool().unwrap()); // integer coercion assert_eq!(args.get("count").unwrap().as_i64().unwrap(), 3); // string stays string assert_eq!(args.get("label").unwrap().as_str().unwrap(), "warm"); } #[test] fn test_type_coercion_array_comma_separated() { let config = get_test_config(); let tools = vec![ToolDefinition { name: "tag_item".to_string(), parameters: Some(serde_json::json!({ "type": "object", "properties": { "tags": {"type": "array"} } })), }]; // Model emits comma-separated values without JSON brackets let message = "tag_itemtagsrust, python, go"; let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap(); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); let tags = args.get("tags").unwrap().as_array().unwrap(); assert_eq!(tags.len(), 3); assert_eq!(tags[0].as_str().unwrap(), "rust"); assert_eq!(tags[1].as_str().unwrap(), "python"); assert_eq!(tags[2].as_str().unwrap(), "go"); } #[test] fn test_type_coercion_array_json() { let config = get_test_config(); let tools = vec![ToolDefinition { name: "tag_item".to_string(), parameters: Some(serde_json::json!({ "type": "object", "properties": { "ids": {"type": "array"} } })), }]; // Model emits proper JSON array let message = r#"tag_itemids[1, 2, 3]"#; let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap(); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); let ids = args.get("ids").unwrap().as_array().unwrap(); assert_eq!(ids.len(), 3); assert_eq!(ids[0].as_i64().unwrap(), 1); } #[test] fn test_type_coercion_falls_back_to_string() { let config = get_test_config(); let tools = vec![ToolDefinition { name: "test_func".to_string(), parameters: Some(serde_json::json!({ "type": "object", "properties": { "count": {"type": "integer"} } })), }]; // "not_a_number" can't be parsed as integer — should fall back to string let message = "test_funccountnot_a_number"; let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap(); let args: HashMap = serde_json::from_str(&calls[0].function.arguments).unwrap(); assert!( args.get("count").unwrap().is_string(), "Should fall back to string when coercion fails" ); } }