"lib/vscode:/vscode.git/clone" did not exist on "e75bcf6704471bbc3277bdd776bc1d84b2b7ff1c"
Unverified Commit 33d71102 authored by Ayush Agarwal's avatar Ayush Agarwal Committed by GitHub
Browse files

feat: glm47 tool parser (#5897)


Signed-off-by: default avatarayushag <ayushag@nvidia.com>
Signed-off-by: default avatarMatej Kosec <mkosec@nvidia.com>
Signed-off-by: default avatarMarko Kosec <mkosec@nvidia.com>
Co-authored-by: default avatarMatej Kosec <mkosec@nvidia.com>
Co-authored-by: default avatarishandhanani <82981111+ishandhanani@users.noreply.github.com>
parent bc8f1170
......@@ -44,6 +44,7 @@ Parser to Model Mapping
| deepseek_v3_1 | deepseek-ai/DeepSeek-V3.1 |
| pythonic | meta-llama/Llama-4-* |
| jamba | ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, |
| glm47 | zai-org/GLM-4.7 |
## Examples
......
......@@ -99,6 +99,37 @@ impl Default for DsmlParserConfig {
}
}
/// Configuration for GLM-4.7 style tool call parser
/// Format: <tool_call>function_name<arg_key>param</arg_key><arg_value>value</arg_value></tool_call>
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct Glm47ParserConfig {
/// Start token for tool call block (e.g., "<tool_call>")
pub tool_call_start: String,
/// End token for tool call block (e.g., "</tool_call>")
pub tool_call_end: String,
/// Start token for argument key (e.g., "<arg_key>")
pub arg_key_start: String,
/// End token for argument key (e.g., "</arg_key>")
pub arg_key_end: String,
/// Start token for argument value (e.g., "<arg_value>")
pub arg_value_start: String,
/// End token for argument value (e.g., "</arg_value>")
pub arg_value_end: String,
}
impl Default for Glm47ParserConfig {
fn default() -> Self {
Self {
tool_call_start: "<tool_call>".to_string(),
tool_call_end: "</tool_call>".to_string(),
arg_key_start: "<arg_key>".to_string(),
arg_key_end: "</arg_key>".to_string(),
arg_value_start: "<arg_value>".to_string(),
arg_value_end: "</arg_value>".to_string(),
}
}
}
/// Parser-specific configuration
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
......@@ -109,6 +140,7 @@ pub enum ParserConfig {
Harmony(JsonParserConfig),
Typescript,
Dsml(DsmlParserConfig),
Glm47(Glm47ParserConfig),
}
impl ParserConfig {
......@@ -122,6 +154,7 @@ impl ParserConfig {
ParserConfig::Pythonic => vec![],
ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()],
}
}
......@@ -135,6 +168,7 @@ impl ParserConfig {
ParserConfig::Pythonic => vec![],
ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()],
}
}
}
......@@ -314,4 +348,13 @@ impl ToolCallConfig {
}),
}
}
pub fn glm47() -> Self {
// GLM-4.7 format:
// <tool_call>function_name<arg_key>param1</arg_key><arg_value>value1</arg_value></tool_call>
// Reference: https://huggingface.co/zai-org/GLM-4.7/blob/main/chat_template.jinja
Self {
parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()),
}
}
}
......@@ -19,7 +19,8 @@ use super::pythonic::{
};
use super::response::ToolCallResponse;
use super::xml::{
detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml,
detect_tool_call_start_glm47, detect_tool_call_start_xml, find_tool_call_end_position_glm47,
find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_xml,
};
use std::collections::HashMap;
use std::sync::OnceLock;
......@@ -43,6 +44,7 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
map.insert("qwen3_coder", ToolCallConfig::qwen3_coder());
map.insert("jamba", ToolCallConfig::jamba());
map.insert("minimax_m2", ToolCallConfig::minimax_m2());
map.insert("glm47", ToolCallConfig::glm47());
map.insert("default", ToolCallConfig::default());
map.insert("nemotron_nano", ToolCallConfig::qwen3_coder()); // nemotron nano follows qwen3_coder format
map
......@@ -84,6 +86,11 @@ pub async fn try_tool_call_parse(
let (results, normal_content) = try_tool_call_parse_dsml(message, dsml_config)?;
Ok((results, normal_content))
}
ParserConfig::Glm47(glm47_config) => {
let (results, normal_content) =
try_tool_call_parse_glm47(message, glm47_config, tools)?;
Ok((results, normal_content))
}
}
}
......@@ -134,6 +141,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::
}
ParserConfig::Xml(xml_config) => Ok(detect_tool_call_start_xml(chunk, xml_config)),
ParserConfig::Dsml(dsml_config) => Ok(detect_tool_call_start_dsml(chunk, dsml_config)),
ParserConfig::Glm47(glm47_config) => {
Ok(detect_tool_call_start_glm47(chunk, glm47_config))
}
},
None => anyhow::bail!(
"Parser '{}' is not implemented. Available parsers: {:?}",
......@@ -171,6 +181,9 @@ pub fn find_tool_call_end_position(chunk: &str, parser_str: Option<&str>) -> usi
}
ParserConfig::Xml(xml_config) => find_tool_call_end_position_xml(chunk, xml_config),
ParserConfig::Dsml(dsml_config) => find_tool_call_end_position_dsml(chunk, dsml_config),
ParserConfig::Glm47(glm47_config) => {
find_tool_call_end_position_glm47(chunk, glm47_config)
}
},
None => {
// Unknown parser, return full content length
......@@ -211,6 +224,7 @@ mod tests {
"jamba",
"nemotron_nano",
"minimax_m2",
"glm47",
];
for parser in available_parsers {
assert!(parsers.contains(&parser));
......
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// GLM-4.7 Tool Call Parser
// Format: <tool_call>function_name<arg_key>param1</arg_key><arg_value>value1</arg_value></tool_call>
// Reference: https://huggingface.co/zai-org/GLM-4.7/blob/main/chat_template.jinja
use regex::Regex;
use serde_json::Value;
use std::collections::HashMap;
use tracing::warn;
use uuid::Uuid;
use super::super::ToolDefinition;
use super::super::config::Glm47ParserConfig;
use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
/// Check if a chunk contains the start of a GLM-4.7 tool call.
/// Format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
pub fn detect_tool_call_start_glm47(chunk: &str, config: &Glm47ParserConfig) -> bool {
let start_token = &config.tool_call_start;
// Check if we have the complete start token
if chunk.contains(start_token.as_str()) {
return true;
}
// Check for partial match at the end of the chunk (for streaming)
for i in 1..start_token.len() {
if chunk.ends_with(&start_token[..i]) {
return true;
}
}
false
}
/// Find the end position of a GLM-4.7 tool call.
/// Returns the position after </tool_call> or the length of the chunk if not found.
pub fn find_tool_call_end_position_glm47(chunk: &str, config: &Glm47ParserConfig) -> usize {
let end_token = &config.tool_call_end;
if let Some(pos) = chunk.find(end_token.as_str()) {
pos + end_token.len()
} else {
chunk.len()
}
}
/// Try to parse GLM-4.7 formatted tool calls from a message.
/// Format: <tool_call>function_name<arg_key>param1</arg_key><arg_value>value1</arg_value></tool_call>
/// Returns (parsed_tool_calls, normal_text_content)
pub fn try_tool_call_parse_glm47(
message: &str,
config: &Glm47ParserConfig,
tools: Option<&[ToolDefinition]>,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
let (normal_text, tool_calls) = extract_tool_calls(message, config, tools)?;
let normal_content = if normal_text.is_empty() {
Some("".to_string())
} else {
Some(normal_text)
};
Ok((tool_calls, normal_content))
}
/// Extract tool calls and normal text from message.
fn extract_tool_calls(
text: &str,
config: &Glm47ParserConfig,
tools: Option<&[ToolDefinition]>,
) -> anyhow::Result<(String, Vec<ToolCallResponse>)> {
let mut normal_parts = Vec::new();
let mut calls = Vec::new();
let mut cursor = 0;
let start_token = &config.tool_call_start;
let end_token = &config.tool_call_end;
while cursor < text.len() {
// Find next tool call start
if let Some(start_pos) = text[cursor..].find(start_token.as_str()) {
let abs_start = cursor + start_pos;
// Add text before tool call to normal parts
normal_parts.push(&text[cursor..abs_start]);
// Find the corresponding end token
if let Some(end_pos) = text[abs_start..].find(end_token.as_str()) {
let abs_end = abs_start + end_pos + end_token.len();
let block = &text[abs_start..abs_end];
// Parse this tool call block; preserve unparseable blocks as
// normal text so model output is never silently dropped.
match parse_tool_call_block(block, config, tools) {
Ok(parsed_call) => calls.push(parsed_call),
Err(e) => {
warn!("Failed to parse GLM-4.7 tool call block: {e}");
normal_parts.push(block);
}
}
cursor = abs_end;
} else {
// No end token found -> treat the rest as normal text
normal_parts.push(&text[abs_start..]);
break;
}
} else {
// No more tool calls
normal_parts.push(&text[cursor..]);
break;
}
}
let normal_text = normal_parts.join("").trim().to_string();
Ok((normal_text, calls))
}
/// Decode XML character entities in a string.
/// Handles the five predefined XML entities: &lt; &gt; &amp; &quot; &apos;
fn decode_xml_entities(s: &str) -> String {
s.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&amp;", "&")
.replace("&quot;", "\"")
.replace("&apos;", "'")
}
/// Coerce a raw string value to a JSON Value using the tool's parameter schema.
/// Falls back to string if no schema is available or the type is unrecognized.
fn coerce_value(raw: &str, schema_type: Option<&str>) -> Value {
let trimmed = raw.trim();
// If the value already looks like JSON (object, array, or quoted string), parse it directly
if (trimmed.starts_with('{') || trimmed.starts_with('[') || trimmed.starts_with('"'))
&& let Ok(v) = serde_json::from_str(trimmed)
{
return v;
}
// Use schema type hints for coercion when available
match schema_type {
Some("integer") | Some("int") => {
if let Ok(n) = trimmed.parse::<i64>() {
return Value::Number(n.into());
}
}
Some("number") | Some("float") | Some("double") => {
if let Ok(n) = trimmed.parse::<f64>()
&& let Some(num) = serde_json::Number::from_f64(n)
{
return Value::Number(num);
}
}
Some("boolean") | Some("bool") => match trimmed.to_lowercase().as_str() {
"true" | "1" | "yes" => return Value::Bool(true),
"false" | "0" | "no" => return Value::Bool(false),
_ => {}
},
Some("array") => {
// Try JSON parse first, then fall back to comma-separated splitting
if let Ok(v) = serde_json::from_str::<Value>(trimmed)
&& v.is_array()
{
return v;
}
let items: Vec<Value> = trimmed
.split(',')
.map(|s| Value::String(s.trim().to_string()))
.collect();
return Value::Array(items);
}
Some("null") => {
if trimmed == "null" || trimmed == "None" || trimmed.is_empty() {
return Value::Null;
}
}
_ => {}
}
Value::String(raw.to_string())
}
/// Look up the JSON Schema type for a parameter by name from a tool's parameter schema.
fn get_param_schema_type<'a>(
tools: Option<&'a [ToolDefinition]>,
function_name: &str,
param_name: &str,
) -> Option<&'a str> {
let tool = tools?.iter().find(|t| t.name == function_name)?;
let schema = tool.parameters.as_ref()?;
let props = schema.get("properties")?;
let param = props.get(param_name)?;
param.get("type")?.as_str()
}
/// Parse a single GLM-4.7 tool call block
/// Format: <tool_call>function_name<arg_key>key1</arg_key><arg_value>value1</arg_value>...</tool_call>
fn parse_tool_call_block(
block: &str,
config: &Glm47ParserConfig,
tools: Option<&[ToolDefinition]>,
) -> anyhow::Result<ToolCallResponse> {
// Remove the outer <tool_call> tags
let start_token = &config.tool_call_start;
let end_token = &config.tool_call_end;
let content = block
.strip_prefix(start_token.as_str())
.and_then(|s| s.strip_suffix(end_token.as_str()))
.ok_or_else(|| anyhow::anyhow!("Invalid tool call block format"))?;
// Extract function name (everything before first <arg_key> or end)
let arg_key_start = &config.arg_key_start;
let function_name = if let Some(pos) = content.find(arg_key_start.as_str()) {
content[..pos].trim().to_string()
} else {
// No arguments, just function name
content.trim().to_string()
};
if function_name.is_empty() {
anyhow::bail!("Empty function name in tool call");
}
// Parse key-value pairs
let mut arguments = HashMap::new();
let args_section = &content[function_name.len()..];
// Build regex patterns
let arg_key_start_escaped = regex::escape(&config.arg_key_start);
let arg_key_end_escaped = regex::escape(&config.arg_key_end);
let arg_value_start_escaped = regex::escape(&config.arg_value_start);
let arg_value_end_escaped = regex::escape(&config.arg_value_end);
// Pattern to match: <arg_key>key</arg_key><arg_value>value</arg_value>
// (?s) enables dotall mode so (.*?) matches across newlines — required
// because models often emit multi-line content in arg values.
let pattern = format!(
r"(?s){}([^<]+){}{}(.*?){}",
arg_key_start_escaped, arg_key_end_escaped, arg_value_start_escaped, arg_value_end_escaped
);
let regex = Regex::new(&pattern)?;
for cap in regex.captures_iter(args_section) {
let key = cap.get(1).map(|m| m.as_str().trim()).unwrap_or("");
let raw_value = cap.get(2).map(|m| m.as_str()).unwrap_or("");
if !key.is_empty() {
// Decode XML entities (e.g. &lt; → <, &amp; → &) before parsing
let decoded = decode_xml_entities(raw_value);
// Look up the expected type from the tool's parameter schema
let schema_type = get_param_schema_type(tools, &function_name, key);
let json_value = coerce_value(&decoded, schema_type);
arguments.insert(key.to_string(), json_value);
}
}
// Validate function against tools if provided
if let Some(tools_list) = tools {
let tool_exists = tools_list.iter().any(|t| t.name == function_name);
if !tool_exists {
anyhow::bail!("Function '{}' not found in available tools", function_name);
}
}
Ok(ToolCallResponse {
id: Uuid::new_v4().to_string(),
tp: ToolCallType::Function,
function: CalledFunction {
name: function_name,
arguments: serde_json::to_string(&arguments)?,
},
})
}
#[cfg(test)]
mod tests {
use super::*;
fn get_test_config() -> Glm47ParserConfig {
Glm47ParserConfig::default()
}
#[test]
fn test_detect_tool_call_start() {
let config = get_test_config();
// Complete start token
assert!(detect_tool_call_start_glm47(
"<tool_call>get_weather",
&config
));
// Partial start token (streaming)
assert!(detect_tool_call_start_glm47("Some text <tool", &config));
assert!(detect_tool_call_start_glm47("Some text <tool_c", &config));
// No tool call
assert!(!detect_tool_call_start_glm47("Just normal text", &config));
}
#[test]
fn test_parse_simple_tool_call() {
let config = get_test_config();
let message = "<tool_call>get_weather<arg_key>location</arg_key><arg_value>San Francisco</arg_value></tool_call>";
let (calls, normal_text) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "get_weather");
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert_eq!(
args.get("location").unwrap().as_str().unwrap(),
"San Francisco"
);
assert_eq!(normal_text, Some("".to_string()));
}
#[test]
fn test_parse_tool_call_with_multiple_args() {
let config = get_test_config();
let message = "<tool_call>book_flight<arg_key>from</arg_key><arg_value>NYC</arg_value><arg_key>to</arg_key><arg_value>LAX</arg_value><arg_key>date</arg_key><arg_value>2026-03-15</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "book_flight");
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert_eq!(args.get("from").unwrap().as_str().unwrap(), "NYC");
assert_eq!(args.get("to").unwrap().as_str().unwrap(), "LAX");
assert_eq!(args.get("date").unwrap().as_str().unwrap(), "2026-03-15");
}
#[test]
fn test_parse_tool_call_with_json_value() {
let config = get_test_config();
let message = r#"<tool_call>search<arg_key>filters</arg_key><arg_value>{"category": "books", "price_max": 50}</arg_value></tool_call>"#;
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "search");
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
let filters = args.get("filters").unwrap();
assert!(filters.is_object());
}
#[test]
fn test_parse_multiple_tool_calls() {
let config = get_test_config();
let message = "<tool_call>get_weather<arg_key>location</arg_key><arg_value>NYC</arg_value></tool_call><tool_call>get_time<arg_key>timezone</arg_key><arg_value>EST</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 2);
assert_eq!(calls[0].function.name, "get_weather");
assert_eq!(calls[1].function.name, "get_time");
}
#[test]
fn test_parse_with_normal_text() {
let config = get_test_config();
let message = "I'll check the weather for you. <tool_call>get_weather<arg_key>location</arg_key><arg_value>Paris</arg_value></tool_call>";
let (calls, normal_text) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "get_weather");
assert_eq!(
normal_text,
Some("I'll check the weather for you.".to_string())
);
}
#[test]
fn test_parse_tool_call_no_args() {
let config = get_test_config();
let message = "<tool_call>get_current_time</tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "get_current_time");
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert!(args.is_empty());
}
#[test]
fn test_find_tool_call_end_position() {
let config = get_test_config();
let chunk =
"<tool_call>func<arg_key>k</arg_key><arg_value>v</arg_value></tool_call>more text";
let end_pos = find_tool_call_end_position_glm47(chunk, &config);
assert_eq!(
&chunk[..end_pos],
"<tool_call>func<arg_key>k</arg_key><arg_value>v</arg_value></tool_call>"
);
}
#[test]
fn test_parse_multiline_arg_value() {
let config = get_test_config();
let message = "<tool_call>write_file<arg_key>path</arg_key><arg_value>/tmp/hello.py</arg_value><arg_key>content</arg_key><arg_value>#!/usr/bin/env python3\nprint(\"Hello, World!\")\n</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].function.name, "write_file");
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert_eq!(args.get("path").unwrap().as_str().unwrap(), "/tmp/hello.py");
assert!(
args.contains_key("content"),
"content argument must be parsed even when it contains newlines"
);
let content = args.get("content").unwrap().as_str().unwrap();
assert!(content.contains("print(\"Hello, World!\")"));
}
#[test]
fn test_malformed_tool_call() {
let config = get_test_config();
// Missing end tag
let message = "<tool_call>get_weather";
let result = try_tool_call_parse_glm47(message, &config, None);
assert!(result.is_ok()); // Should handle gracefully, no calls extracted
let (calls, _) = result.unwrap();
assert_eq!(calls.len(), 0);
}
#[test]
fn test_unparseable_block_preserved_as_normal_text() {
let config = get_test_config();
let tools = vec![ToolDefinition {
name: "get_weather".to_string(),
parameters: None,
}];
// Tool call block references a function not in the tools list
let message = "Here is the result: <tool_call>unknown_func<arg_key>x</arg_key><arg_value>1</arg_value></tool_call> done";
let (calls, normal_text) =
try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap();
assert_eq!(calls.len(), 0);
// The unparseable block should be preserved in normal text, not dropped
let text = normal_text.unwrap();
assert!(
text.contains("unknown_func"),
"Unparseable block should be in normal text, got: {text}"
);
}
#[test]
fn test_xml_entity_decoding() {
let config = get_test_config();
let message = r#"<tool_call>write_file<arg_key>content</arg_key><arg_value>x &lt; y &amp;&amp; y &gt; z</arg_value></tool_call>"#;
let (calls, _) = try_tool_call_parse_glm47(message, &config, None).unwrap();
assert_eq!(calls.len(), 1);
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert_eq!(
args.get("content").unwrap().as_str().unwrap(),
"x < y && y > z"
);
}
#[test]
fn test_type_coercion_with_schema() {
let config = get_test_config();
let tools = vec![ToolDefinition {
name: "set_temperature".to_string(),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"degrees": {"type": "number"},
"enabled": {"type": "boolean"},
"count": {"type": "integer"},
"label": {"type": "string"}
}
})),
}];
let message = "<tool_call>set_temperature<arg_key>degrees</arg_key><arg_value>72.5</arg_value><arg_key>enabled</arg_key><arg_value>true</arg_value><arg_key>count</arg_key><arg_value>3</arg_value><arg_key>label</arg_key><arg_value>warm</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap();
assert_eq!(calls.len(), 1);
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
// number coercion
assert_eq!(args.get("degrees").unwrap().as_f64().unwrap(), 72.5);
// boolean coercion
assert!(args.get("enabled").unwrap().as_bool().unwrap());
// integer coercion
assert_eq!(args.get("count").unwrap().as_i64().unwrap(), 3);
// string stays string
assert_eq!(args.get("label").unwrap().as_str().unwrap(), "warm");
}
#[test]
fn test_type_coercion_array_comma_separated() {
let config = get_test_config();
let tools = vec![ToolDefinition {
name: "tag_item".to_string(),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"tags": {"type": "array"}
}
})),
}];
// Model emits comma-separated values without JSON brackets
let message = "<tool_call>tag_item<arg_key>tags</arg_key><arg_value>rust, python, go</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap();
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
let tags = args.get("tags").unwrap().as_array().unwrap();
assert_eq!(tags.len(), 3);
assert_eq!(tags[0].as_str().unwrap(), "rust");
assert_eq!(tags[1].as_str().unwrap(), "python");
assert_eq!(tags[2].as_str().unwrap(), "go");
}
#[test]
fn test_type_coercion_array_json() {
let config = get_test_config();
let tools = vec![ToolDefinition {
name: "tag_item".to_string(),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"ids": {"type": "array"}
}
})),
}];
// Model emits proper JSON array
let message = r#"<tool_call>tag_item<arg_key>ids</arg_key><arg_value>[1, 2, 3]</arg_value></tool_call>"#;
let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap();
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
let ids = args.get("ids").unwrap().as_array().unwrap();
assert_eq!(ids.len(), 3);
assert_eq!(ids[0].as_i64().unwrap(), 1);
}
#[test]
fn test_type_coercion_falls_back_to_string() {
let config = get_test_config();
let tools = vec![ToolDefinition {
name: "test_func".to_string(),
parameters: Some(serde_json::json!({
"type": "object",
"properties": {
"count": {"type": "integer"}
}
})),
}];
// "not_a_number" can't be parsed as integer — should fall back to string
let message = "<tool_call>test_func<arg_key>count</arg_key><arg_value>not_a_number</arg_value></tool_call>";
let (calls, _) = try_tool_call_parse_glm47(message, &config, Some(&tools)).unwrap();
let args: HashMap<String, Value> =
serde_json::from_str(&calls[0].function.arguments).unwrap();
assert!(
args.get("count").unwrap().is_string(),
"Should fall back to string when coercion fails"
);
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
mod glm47_parser;
mod parser;
pub use super::response;
pub use glm47_parser::{
detect_tool_call_start_glm47, find_tool_call_end_position_glm47, try_tool_call_parse_glm47,
};
pub use parser::{
detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml,
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment