Unverified Commit c1c8dd1d authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router][tool parser] Modify tool parser to return both normal text and tool...

[router][tool parser] Modify tool parser to return both normal text and tool calls (non-stream) (#10995)
parent f6bc3f52
......@@ -409,7 +409,7 @@ fn bench_concurrent_parsing(c: &mut Criterion) {
let result =
rt.block_on(async { parser.parse_complete(input).await });
if let Ok(tools) = result {
if let Ok((_normal_text, tools)) = result {
total_p.fetch_add(tools.len() as u64, Ordering::Relaxed);
}
}
......
......@@ -3,7 +3,7 @@ use std::fmt;
/// Result of parsing text for reasoning content.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct ParserResult {
/// The normal text outside of reasoning blocks.
/// The normal text outside reasoning blocks.
pub normal_text: String,
/// The extracted reasoning text from within reasoning blocks.
......
......@@ -804,7 +804,7 @@ impl GrpcRouter {
.get_parser(&original_request.model)
{
match parser.parse_complete(&processed_text).await {
Ok(parsed_tool_calls) => {
Ok((normal_text, parsed_tool_calls)) => {
if !parsed_tool_calls.is_empty() {
let spec_tool_calls = parsed_tool_calls
.into_iter()
......@@ -821,7 +821,7 @@ impl GrpcRouter {
})
.collect();
tool_calls = Some(spec_tool_calls);
processed_text = String::new();
processed_text = normal_text;
}
}
Err(e) => {
......
......@@ -50,14 +50,6 @@ impl DeepSeekParser {
text.contains("<|tool▁calls▁begin|>")
}
/// Extract all tool call blocks from text
fn extract_tool_calls<'a>(&self, text: &'a str) -> Vec<&'a str> {
self.tool_call_extractor
.find_iter(text)
.map(|m| m.as_str())
.collect()
}
/// Parse a single tool call block
fn parse_tool_call(&self, block: &str) -> ToolParserResult<Option<ToolCall>> {
if let Some(captures) = self.func_detail_extractor.captures(block) {
......@@ -115,23 +107,42 @@ impl Default for DeepSeekParser {
#[async_trait]
impl ToolParser for DeepSeekParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains DeepSeek format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Extract all tool call blocks
let tool_blocks = self.extract_tool_calls(text);
// Collect matches with positions and parse tools in one pass
let matches: Vec<_> = self.tool_call_extractor.find_iter(text).collect();
let mut tools = Vec::new();
for block in tool_blocks {
if let Some(tool) = self.parse_tool_call(block)? {
for mat in matches.iter() {
if let Some(tool) = self.parse_tool_call(mat.as_str())? {
tools.push(tool);
}
}
Ok(tools)
// Extract normal text using first and last match positions
let normal_text = if tools.is_empty() || matches.is_empty() {
text.to_string()
} else {
let first_start = matches[0].start();
let last_end = matches.last().unwrap().end();
let before = if first_start > 0 {
&text[..first_start]
} else {
""
};
let after = if last_end < text.len() {
&text[last_end..]
} else {
""
};
format!("{}{}", before, after)
};
Ok((normal_text, tools))
}
async fn parse_incremental(
......@@ -241,10 +252,10 @@ mod tests {
{"location": "Tokyo", "units": "celsius"}
```<|tool▁call▁end|><|tool▁calls▁end|>More text"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Tokyo"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Tokyo"));
}
#[tokio::test]
......@@ -259,12 +270,12 @@ mod tests {
{"location": "Paris"}
```<|tool▁call▁end|><|tool▁calls▁end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Tokyo"));
assert!(result[1].function.arguments.contains("Paris"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Tokyo"));
assert!(tools[1].function.arguments.contains("Paris"));
}
#[test]
......
......@@ -130,21 +130,42 @@ impl Default for Glm4MoeParser {
#[async_trait]
impl ToolParser for Glm4MoeParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains GLM-4 MoE format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Extract all tool call blocks
// Collect matches with positions and parse tools in one pass
let matches: Vec<_> = self.tool_call_extractor.find_iter(text).collect();
let mut tools = Vec::new();
for mat in self.tool_call_extractor.find_iter(text) {
for mat in matches.iter() {
if let Some(tool) = self.parse_tool_call(mat.as_str())? {
tools.push(tool);
}
}
Ok(tools)
// Extract normal text using first and last match positions
let normal_text = if tools.is_empty() {
text.to_string()
} else {
let first_start = matches[0].start();
let last_end = matches.last().unwrap().end();
let before = if first_start > 0 {
&text[..first_start]
} else {
""
};
let after = if last_end < text.len() {
&text[last_end..]
} else {
""
};
format!("{}{}", before, after)
};
Ok((normal_text, tools))
}
async fn parse_incremental(
......@@ -232,11 +253,12 @@ mod tests {
<arg_value>2024-06-27</arg_value>
</tool_call>More text"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Beijing"));
assert!(result[0].function.arguments.contains("2024-06-27"));
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Beijing"));
assert!(tools[0].function.arguments.contains("2024-06-27"));
assert_eq!(normal_text, "Some text\nMore text"); // Text before and after tool call
}
#[tokio::test]
......@@ -251,12 +273,13 @@ mod tests {
<arg_value>Shanghai</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Beijing"));
assert!(result[1].function.arguments.contains("Shanghai"));
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Beijing"));
assert!(tools[1].function.arguments.contains("Shanghai"));
assert_eq!(normal_text, ""); // Pure tool calls, no normal text
}
#[tokio::test]
......@@ -271,12 +294,13 @@ mod tests {
<arg_value>test</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process_data");
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(normal_text, ""); // Pure tool call, no normal text
assert_eq!(tools[0].function.name, "process_data");
// Parse arguments to check types
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["count"], 42);
assert_eq!(args["active"], true);
assert_eq!(args["name"], "test");
......
......@@ -71,10 +71,10 @@ impl Default for GptOssParser {
#[async_trait]
impl ToolParser for GptOssParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains GPT-OSS format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
let mut tools = Vec::new();
......@@ -119,7 +119,7 @@ impl ToolParser for GptOssParser {
}
}
Ok(tools)
Ok((String::new(), tools)) // GPT-OSS parser returns empty normal text
}
async fn parse_incremental(
......@@ -239,10 +239,10 @@ mod tests {
<|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location": "San Francisco"}<|call|>
More text"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("San Francisco"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("San Francisco"));
}
#[tokio::test]
......@@ -251,12 +251,12 @@ More text"#;
let input = r#"<|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location": "Paris"}<|call|>commentary
<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query": "Paris tourism"}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "search");
assert!(result[0].function.arguments.contains("Paris"));
assert!(result[1].function.arguments.contains("Paris tourism"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "search");
assert!(tools[0].function.arguments.contains("Paris"));
assert!(tools[1].function.arguments.contains("Paris tourism"));
}
#[tokio::test]
......@@ -264,9 +264,9 @@ More text"#;
let parser = GptOssParser::new();
let input = r#"<|start|>assistant<|channel|>commentary to=functions.test<|constrain|>json<|message|>{"key": "value"}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
}
#[tokio::test]
......@@ -275,10 +275,10 @@ More text"#;
let input =
r#"<|channel|>commentary to=functions.get_time<|constrain|>json<|message|>{}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_time");
assert_eq!(result[0].function.arguments, "{}");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_time");
assert_eq!(tools[0].function.arguments, "{}");
}
#[test]
......
......@@ -88,64 +88,65 @@ impl JsonParser {
content.trim()
}
/// Try to extract a JSON object or array from text that may contain other content
fn extract_json_from_text(&self, text: &str) -> Option<String> {
// Look for JSON object starting with {
if let Some(start) = text.find('{') {
let mut depth = 0;
let mut in_string = false;
let mut escape_next = false;
for (i, ch) in text[start..].char_indices() {
if escape_next {
escape_next = false;
continue;
}
/// Try to extract a first valid JSON object or array from text that may contain other content
/// Returns (json_string, normal_text) where normal_text is text before and after the JSON
fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
let mut in_string = false;
let mut escape = false;
let mut stack: Vec<char> = Vec::with_capacity(8);
let mut start: Option<usize> = None;
for (i, ch) in text.char_indices() {
if escape {
escape = false;
continue;
}
match ch {
'\\' if in_string => escape_next = true,
'"' if !in_string => in_string = true,
'"' if in_string => in_string = false,
'{' if !in_string => depth += 1,
'}' if !in_string => {
depth -= 1;
if depth == 0 {
return Some(text[start..start + i + 1].to_string());
}
match ch {
'\\' if in_string => escape = true,
'"' => in_string = !in_string,
_ if in_string => {}
'{' | '[' => {
if start.is_none() {
start = Some(i);
}
_ => {}
stack.push(ch);
}
}
}
// Look for JSON array starting with [
if let Some(start) = text.find('[') {
let mut depth = 0;
let mut in_string = false;
let mut escape_next = false;
'}' | ']' => {
let Some(open) = stack.pop() else {
// Stray closer - reset and continue looking for next valid JSON
start = None;
continue;
};
for (i, ch) in text[start..].char_indices() {
if escape_next {
escape_next = false;
continue;
}
let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
if !valid {
// Mismatch - reset and continue looking
start = None;
stack.clear();
continue;
}
match ch {
'\\' if in_string => escape_next = true,
'"' if !in_string => in_string = true,
'"' if in_string => in_string = false,
'[' if !in_string => depth += 1,
']' if !in_string => {
depth -= 1;
if depth == 0 {
return Some(text[start..start + i + 1].to_string());
if stack.is_empty() {
let s = start.unwrap();
let e = i + ch.len_utf8();
let potential_json = &text[s..e];
// Validate that this is actually valid JSON before returning
if serde_json::from_str::<Value>(potential_json).is_ok() {
let json = potential_json.to_string();
let normal = format!("{}{}", &text[..s], &text[e..]);
return Some((json, normal));
} else {
// Not valid JSON, reset and continue looking
start = None;
continue;
}
}
_ => {}
}
_ => {}
}
}
None
}
......@@ -241,16 +242,20 @@ impl Default for JsonParser {
#[async_trait]
impl ToolParser for JsonParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if we have multiple start tokens (e.g., multiple <|python_tag|> markers)
if !self.token_config.start_tokens.is_empty() {
let start_token = &self.token_config.start_tokens[0];
if !start_token.is_empty() && text.matches(start_token).count() > 1 {
// We have multiple occurrences of the start token
let mut all_tools = Vec::new();
let mut all_normal_text = String::new();
let mut remaining = text;
while let Some(start_pos) = remaining.find(start_token.as_str()) {
// Add text before this start token to normal text
all_normal_text.push_str(&remaining[..start_pos]);
// Extract content after this start token
let after_token = &remaining[start_pos + start_token.len()..];
......@@ -264,12 +269,19 @@ impl ToolParser for JsonParser {
let json_content = &after_token[..end_pos];
// Try to extract and parse JSON from this segment
if let Some(extracted) = self.extract_json_from_text(json_content) {
if let Some((extracted, segment_normal_text)) =
self.extract_json_from_text(json_content)
{
if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
if let Ok(tools) = self.parse_json_value(&value) {
all_tools.extend(tools);
}
}
// Add the normal text from this segment
all_normal_text.push_str(&segment_normal_text);
} else {
// If no JSON found, add the entire content as normal text
all_normal_text.push_str(json_content);
}
// Move to the next segment
......@@ -279,9 +291,10 @@ impl ToolParser for JsonParser {
}
}
if !all_tools.is_empty() {
return Ok(all_tools);
}
// Add any remaining text
all_normal_text.push_str(remaining);
return Ok((all_normal_text, all_tools));
}
}
......@@ -290,21 +303,30 @@ impl ToolParser for JsonParser {
// Try to parse as JSON first
match serde_json::from_str::<Value>(json_content) {
Ok(value) => self.parse_json_value(&value),
Ok(value) => {
let tools = self.parse_json_value(&value)?;
Ok((String::new(), tools))
}
Err(_) => {
// If parse failed, check if we have multiple JSON objects separated by the configured separator
// This handles cases like: {"name": "func1", ...};{"name": "func2", ...}
// Only do this if we can reasonably expect multiple complete JSON objects
// (i.e., text starts and ends with JSON-like structure)
if !self.token_config.separator.is_empty()
&& json_content.contains(&self.token_config.separator)
&& json_content.trim().starts_with('{')
&& json_content.trim().ends_with('}')
{
let mut all_tools = Vec::new();
// Split by separator and try to parse each part
let parts: Vec<&str> =
json_content.split(&self.token_config.separator).collect();
let mut normal_parts = Vec::new();
for part in parts {
let trimmed = part.trim();
if trimmed.is_empty() {
normal_parts.push(trimmed.to_string());
continue;
}
......@@ -313,32 +335,40 @@ impl ToolParser for JsonParser {
if let Ok(tools) = self.parse_json_value(&value) {
all_tools.extend(tools);
}
} else if let Some(extracted) = self.extract_json_from_text(trimmed) {
normal_parts.push(trimmed.to_string());
} else if let Some((extracted, part_normal_text)) =
self.extract_json_from_text(trimmed)
{
// Try extracting JSON from this part
if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
if let Ok(tools) = self.parse_json_value(&value) {
all_tools.extend(tools);
}
}
normal_parts.push(part_normal_text);
} else {
normal_parts.push(trimmed.to_string());
}
}
if !all_tools.is_empty() {
return Ok(all_tools);
}
// Rejoin with the original separator to preserve it
let all_normal_text = normal_parts.join(&self.token_config.separator);
return Ok((all_normal_text, all_tools));
}
// If no wrapper tokens configured and parse failed,
// try to extract JSON from mixed text
// If no wrapper tokens configured and parse failed, try to extract JSON from mixed text
if self.token_config.start_tokens.is_empty() {
if let Some(extracted) = self.extract_json_from_text(text) {
if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
return self.parse_json_value(&value);
if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
if let Ok(value) = serde_json::from_str::<Value>(&extracted_json) {
let tools = self.parse_json_value(&value)?;
return Ok((normal_text, tools));
}
}
}
// Not valid JSON, return empty
Ok(vec![])
// No valid JSON found, return original text as normal text
Ok((text.to_string(), vec![]))
}
}
}
......@@ -538,9 +568,41 @@ mod tests {
let parser = JsonParser::new();
let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "get_weather");
assert_eq!(normal_text, ""); // Pure JSON should have no normal text
}
#[tokio::test]
async fn test_extract_json_with_normal_text() {
let parser = JsonParser::new();
// Test extraction of JSON from mixed text
let input =
r#"Here is some text before {"name": "test", "arguments": {}} and some text after."#;
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "test");
assert_eq!(
normal_text,
"Here is some text before and some text after."
);
}
#[tokio::test]
async fn test_extract_json_array_with_normal_text() {
let parser = JsonParser::new();
// Test extraction of JSON array from mixed text
let input = r#"Prefix text [{"name": "func1", "arguments": {}}, {"name": "func2", "arguments": {}}] suffix text"#;
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 2);
assert_eq!(tool_calls[0].function.name, "func1");
assert_eq!(tool_calls[1].function.name, "func2");
assert_eq!(normal_text, "Prefix text suffix text");
}
#[tokio::test]
......@@ -551,10 +613,11 @@ mod tests {
{"name": "search", "arguments": {"query": "news"}}
]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "search");
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 2);
assert_eq!(tool_calls[0].function.name, "get_weather");
assert_eq!(tool_calls[1].function.name, "search");
assert_eq!(normal_text, ""); // Pure JSON should have no normal text
}
#[tokio::test]
......@@ -562,10 +625,11 @@ mod tests {
let parser = JsonParser::new();
let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "calculate");
assert!(result[0].function.arguments.contains("10"));
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "calculate");
assert!(tool_calls[0].function.arguments.contains("10"));
assert_eq!(normal_text, ""); // Pure JSON should have no normal text
}
#[tokio::test]
......@@ -577,9 +641,38 @@ mod tests {
});
let input = r#"<tool>{"name": "test", "arguments": {}}</tool>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "test");
assert_eq!(normal_text, ""); // Wrapper tokens with no extra text
}
#[tokio::test]
async fn test_parse_with_start_token_invalid_json() {
let parser = JsonParser::with_config(TokenConfig {
start_tokens: vec!["<|python_tag|>".to_string()],
end_tokens: vec!["".to_string()],
separator: ";".to_string(),
});
let input = r#"Hello world <|python_tag|>this is not valid json at all"#;
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 0);
assert_eq!(normal_text, input); // Should return entire original text when JSON parsing fails
}
#[tokio::test]
async fn test_parse_with_normal_text() {
let parser = JsonParser::new();
let input = r#"Here is the weather data: {"name": "get_weather", "arguments": {"location": "SF"}} Let me know if you need more info."#;
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "get_weather");
assert_eq!(
normal_text,
"Here is the weather data: Let me know if you need more info."
); // Normal text is now extracted when JSON is found in mixed content
}
#[test]
......
......@@ -79,16 +79,18 @@ impl Default for KimiK2Parser {
#[async_trait]
impl ToolParser for KimiK2Parser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains Kimi K2 format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Collect matches with positions and parse tools in one pass
let matches: Vec<_> = self.tool_call_extractor.captures_iter(text).collect();
let mut tools = Vec::new();
// Extract all tool calls
for captures in self.tool_call_extractor.captures_iter(text) {
// Extract all tool calls using collected matches
for captures in matches.iter() {
if let (Some(id_match), Some(args_match)) = (
captures.name("tool_call_id"),
captures.name("function_arguments"),
......@@ -116,7 +118,26 @@ impl ToolParser for KimiK2Parser {
}
}
Ok(tools)
// Extract normal text using first and last match positions
let normal_text = if tools.is_empty() || matches.is_empty() {
text.to_string()
} else {
let first_start = matches[0].get(0).unwrap().start();
let last_end = matches.last().unwrap().get(0).unwrap().end();
let before = if first_start > 0 {
&text[..first_start]
} else {
""
};
let after = if last_end < text.len() {
&text[last_end..]
} else {
""
};
format!("{}{}", before, after)
};
Ok((normal_text, tools))
}
async fn parse_incremental(
......@@ -227,10 +248,10 @@ mod tests {
<|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{"location": "Tokyo", "units": "celsius"}<|tool_call_end|>
<|tool_calls_section_end|>More text"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Tokyo"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Tokyo"));
}
#[tokio::test]
......@@ -241,10 +262,10 @@ mod tests {
<|tool_call_begin|>functions.calculate:1<|tool_call_argument_begin|>{"expression": "2+2"}<|tool_call_end|>
<|tool_calls_section_end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "calculate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "calculate");
}
#[tokio::test]
......@@ -254,9 +275,9 @@ mod tests {
<|tool_call_begin|> functions.test:0 <|tool_call_argument_begin|> {"key": "value"} <|tool_call_end|>
<|tool_calls_section_end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
}
#[test]
......
......@@ -42,22 +42,32 @@ impl Default for LlamaParser {
#[async_trait]
impl ToolParser for LlamaParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// First try with the configured python_tag parser
let result = self.json_parser.parse_complete(text).await?;
if !result.is_empty() {
return Ok(result);
let (_json_normal_text, tools) = self.json_parser.parse_complete(text).await?;
if !tools.is_empty() {
// Extract normal text before the python tag
// JsonParser doesn't preserve normal text for single start tokens, so we do it manually
let normal_text = if let Some(tag_pos) = text.find("<|python_tag|>") {
text[..tag_pos].to_string()
} else {
String::new()
};
return Ok((normal_text, tools));
}
// If no results and text starts with '{', try plain JSON
if text.trim_start().starts_with('{') {
// Create a temporary plain JSON parser
let plain_parser = JsonParser::new();
return plain_parser.parse_complete(text).await;
let (_json_normal_text, tools) = plain_parser.parse_complete(text).await?;
// For plain JSON, don't extract normal text (consistent with JsonParser behavior)
return Ok((String::new(), tools));
}
Ok(vec![])
// No tool calls found, return original text as normal text
Ok((text.to_string(), vec![]))
}
async fn parse_incremental(
......@@ -99,10 +109,11 @@ mod tests {
let parser = LlamaParser::new();
let input = r#"<|python_tag|>{"name": "search", "arguments": {"query": "weather"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "search");
assert!(result[0].function.arguments.contains("weather"));
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "search");
assert!(tool_calls[0].function.arguments.contains("weather"));
assert_eq!(normal_text, ""); // Pure python_tag with JSON should have no normal text
}
#[tokio::test]
......@@ -110,9 +121,10 @@ mod tests {
let parser = LlamaParser::new();
let input = r#"{"name": "calculate", "arguments": {"x": 5, "y": 10}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "calculate");
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "calculate");
assert_eq!(normal_text, ""); // Pure JSON should have no normal text
}
#[tokio::test]
......@@ -120,9 +132,10 @@ mod tests {
let parser = LlamaParser::new();
let input = r#"Let me help you with that. <|python_tag|>{"name": "get_time", "arguments": {"timezone": "UTC"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_time");
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "get_time");
assert_eq!(normal_text, "Let me help you with that. ");
}
#[test]
......@@ -141,15 +154,15 @@ mod tests {
// Note: Llama 3.2 doesn't handle multiple calls well
let input = r#"<|python_tag|>{"name": "func1", "arguments": {"x": 1}};"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
// We expect this to either parse the first JSON object or fail gracefully
// Since the semicolon makes it invalid JSON, it will likely return empty
// This is acceptable as Llama 3.2 doesn't reliably support parallel calls
// If it parses anything, it should be func1
if !result.is_empty() {
assert_eq!(result[0].function.name, "func1");
if !tool_calls.is_empty() {
assert_eq!(tool_calls[0].function.name, "func1");
}
}
}
......@@ -38,6 +38,10 @@ impl MistralParser {
/// - Escape sequences
/// - Bracket depth
fn extract_json_array<'a>(&self, text: &'a str) -> Option<&'a str> {
self.extract_json_array_with_pos(text).map(|(_, json)| json)
}
fn extract_json_array_with_pos<'a>(&self, text: &'a str) -> Option<(usize, &'a str)> {
const BOT_TOKEN: &str = "[TOOL_CALLS] [";
// Find the start of the token
......@@ -78,7 +82,7 @@ impl MistralParser {
bracket_count -= 1;
if bracket_count == 0 {
// Found the matching closing bracket
return Some(&text[json_start..=i]);
return Some((start_idx, &text[json_start..=i]));
}
}
}
......@@ -154,18 +158,31 @@ impl Default for MistralParser {
#[async_trait]
impl ToolParser for MistralParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains Mistral format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Extract JSON array from Mistral format
if let Some(json_array) = self.extract_json_array(text) {
self.parse_json_array(json_array)
// Extract JSON array from Mistral format with position
if let Some((start_idx, json_array)) = self.extract_json_array_with_pos(text) {
// Extract normal text before BOT_TOKEN
let normal_text_before = if start_idx > 0 {
text[..start_idx].to_string()
} else {
String::new()
};
match self.parse_json_array(json_array) {
Ok(tools) => Ok((normal_text_before, tools)),
Err(_) => {
// If JSON parsing fails, return the original text as normal text
Ok((text.to_string(), vec![]))
}
}
} else {
// Markers present but no complete array found
Ok(vec![])
Ok((text.to_string(), vec![]))
}
}
......@@ -291,10 +308,10 @@ mod tests {
let parser = MistralParser::new();
let input = r#"[TOOL_CALLS] [{"name": "get_weather", "arguments": {"location": "Paris", "units": "celsius"}}]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Paris"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Paris"));
}
#[tokio::test]
......@@ -305,10 +322,10 @@ mod tests {
{"name": "calculate", "arguments": {"expression": "2 + 2"}}
]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "calculate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "calculate");
}
#[tokio::test]
......@@ -316,11 +333,11 @@ mod tests {
let parser = MistralParser::new();
let input = r#"[TOOL_CALLS] [{"name": "process", "arguments": {"data": [1, 2, [3, 4]], "config": {"nested": [5, 6]}}}]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process");
// JSON serialization removes spaces, so check for [3,4] without spaces
assert!(result[0].function.arguments.contains("[3,4]"));
assert!(tools[0].function.arguments.contains("[3,4]"));
}
#[tokio::test]
......@@ -328,9 +345,9 @@ mod tests {
let parser = MistralParser::new();
let input = r#"[TOOL_CALLS] [{"name": "echo", "arguments": {"message": "He said \"Hello [World]\""}}]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "echo");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "echo");
}
#[test]
......
......@@ -45,7 +45,8 @@ impl PythonicParser {
}
/// Extract tool calls using bracket counting (similar to MistralParser)
fn extract_tool_calls(&self, text: &str) -> Option<String> {
/// Returns extracted tool call group with [] and normal content
fn extract_tool_calls(&self, text: &str) -> Option<(String, String)> {
// Find the start of a tool call list - look for [ followed by a function name
let chars: Vec<char> = text.chars().collect();
......@@ -103,7 +104,11 @@ impl PythonicParser {
// Found the matching bracket
let extracted: String = chars[start_idx..=i].iter().collect();
if extracted.contains('(') && extracted.contains(')') {
return Some(extracted);
// Calculate normal text by removing the tool call portion
let before = &text[..start_idx];
let after = &text[(i + 1)..];
let normal_text = format!("{}{}", before, after);
return Some((extracted, normal_text));
}
}
}
......@@ -260,11 +265,11 @@ impl PythonicParser {
#[async_trait]
impl ToolParser for PythonicParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
let cleaned = Self::strip_special_tokens(text);
// Extract tool calls using bracket counting
if let Some(tool_calls_text) = self.extract_tool_calls(&cleaned) {
if let Some((tool_calls_text, normal_text)) = self.extract_tool_calls(&cleaned) {
// Remove the outer brackets
let tool_calls_str = &tool_calls_text[1..tool_calls_text.len() - 1];
......@@ -318,9 +323,9 @@ impl ToolParser for PythonicParser {
}
}
Ok(calls)
Ok((normal_text, calls))
} else {
Ok(vec![])
Ok((text.to_string(), vec![]))
}
}
......@@ -336,11 +341,11 @@ impl ToolParser for PythonicParser {
// Try to parse if we have a complete tool call
let cleaned = Self::strip_special_tokens(&state.buffer);
if self.extract_tool_calls(&cleaned).is_some() {
let result = self.parse_complete(&state.buffer).await?;
if !result.is_empty() {
let (_normal_text, tools) = self.parse_complete(&state.buffer).await?;
if !tools.is_empty() {
state.buffer.clear();
return Ok(StreamResult::ToolComplete(
result.into_iter().next().unwrap(),
tools.into_iter().next().unwrap(),
));
}
}
......@@ -369,11 +374,11 @@ mod tests {
let parser = PythonicParser::new();
let input = r#"[search_web(query="Rust programming", max_results=5)]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "search_web");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "search_web");
let args: Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["query"], "Rust programming");
assert_eq!(args["max_results"], 5);
}
......@@ -383,10 +388,10 @@ mod tests {
let parser = PythonicParser::new();
let input = r#"[get_weather(city="Tokyo"), search(query="news")]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "search");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "search");
}
#[tokio::test]
......@@ -394,10 +399,10 @@ mod tests {
let parser = PythonicParser::new();
let input = r#"[test(flag=True, disabled=False, optional=None)]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["flag"], true);
assert_eq!(args["disabled"], false);
assert_eq!(args["optional"], Value::Null);
......@@ -408,11 +413,11 @@ mod tests {
let parser = PythonicParser::new();
let input = r#"<|python_start|>[calculate(x=10, y=20)]<|python_end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "calculate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "calculate");
let args: Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["x"], 10);
assert_eq!(args["y"], 20);
}
......@@ -422,12 +427,41 @@ mod tests {
let parser = PythonicParser::new();
let input = r#"[get_weather(city="London", units="celsius")]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
let args: Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["city"], "London");
assert_eq!(args["units"], "celsius");
}
#[tokio::test]
async fn test_normal_text_extraction() {
let parser = PythonicParser::new();
// Test with text before and after
let input = r#"Please check the weather [get_weather(city="Tokyo")] and let me know."#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(normal_text, "Please check the weather and let me know.");
// Test with only normal text (no tool calls)
let input_no_tools = "This is just normal text without any tool calls.";
let (normal_text, tools) = parser.parse_complete(input_no_tools).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input_no_tools);
// Test with multiple tool calls in single bracket group and normal text
let input_multiple = r#"First, [search(query="rust"), calculate(x=5, y=10)] please."#;
let (normal_text, tools) = parser.parse_complete(input_multiple).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "calculate");
assert_eq!(normal_text, "First, please.");
}
}
......@@ -128,32 +128,51 @@ impl Default for QwenParser {
#[async_trait]
impl ToolParser for QwenParser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains Qwen format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Extract all tool call blocks
let tool_blocks = self.extract_tool_calls(text);
// Collect matches with positions and parse tools in one pass
let matches: Vec<_> = self.extractor.captures_iter(text).collect();
let mut tools = Vec::new();
for (index, json_str) in tool_blocks.iter().enumerate() {
// Parse each JSON block
match serde_json::from_str::<Value>(json_str.trim()) {
Ok(value) => {
if let Some(tool) = self.parse_single_object(&value, index)? {
tools.push(tool);
for (index, captures) in matches.iter().enumerate() {
if let Some(json_str) = captures.get(1) {
match serde_json::from_str::<Value>(json_str.as_str().trim()) {
Ok(value) => {
if let Some(tool) = self.parse_single_object(&value, index)? {
tools.push(tool);
}
}
Err(_) => {
// JSON parsing failed, might be incomplete
}
}
Err(_) => {
// Skip malformed JSON blocks
continue;
}
}
}
Ok(tools)
// Extract normal text using first and last match positions
let normal_text = if tools.is_empty() {
text.to_string()
} else {
let first_start = matches[0].get(0).unwrap().start();
let last_end = matches.last().unwrap().get(0).unwrap().end();
let before = if first_start > 0 {
&text[..first_start]
} else {
""
};
let after = if last_end < text.len() {
&text[last_end..]
} else {
""
};
format!("{}{}", before, after)
};
Ok((normal_text, tools))
}
async fn parse_incremental(
......@@ -276,10 +295,11 @@ mod tests {
{"name": "get_weather", "arguments": {"location": "Beijing", "units": "celsius"}}
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Beijing"));
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Beijing"));
assert_eq!(normal_text, ""); // Pure tool call, no normal text
}
#[tokio::test]
......@@ -292,10 +312,11 @@ mod tests {
{"name": "calculate", "arguments": {"expression": "2 + 2"}}
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "calculate");
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "calculate");
assert_eq!(normal_text, ""); // Pure tool calls, no normal text
}
#[tokio::test]
......@@ -307,9 +328,13 @@ mod tests {
</tool_call>
Here are the results."#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_info");
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_info");
assert_eq!(
normal_text,
"Let me help you with that.\n\nHere are the results."
);
}
#[tokio::test]
......@@ -329,10 +354,11 @@ Here are the results."#;
}
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process_data");
assert!(result[0].function.arguments.contains("nested"));
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process_data");
assert!(tools[0].function.arguments.contains("nested"));
assert_eq!(normal_text, ""); // Pure tool call, no normal text
}
#[test]
......
......@@ -157,10 +157,10 @@ impl Default for Step3Parser {
#[async_trait]
impl ToolParser for Step3Parser {
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Check if text contains Step3 format
if !self.has_tool_markers(text) {
return Ok(vec![]);
return Ok((text.to_string(), vec![]));
}
// Find the tool calls section
......@@ -170,6 +170,7 @@ impl ToolParser for Step3Parser {
// Find the end of tool calls section
if let Some(end_pos) = text[search_from..].find("<|tool_calls_end|>") {
let tool_section = &text[search_from..search_from + end_pos];
let end_abs = search_from + end_pos + "<|tool_calls_end|>".len();
// Extract all tool call blocks
let mut tools = Vec::new();
......@@ -179,11 +180,24 @@ impl ToolParser for Step3Parser {
}
}
return Ok(tools);
// Extract normal text before start and after end
let before = if start_pos > 0 {
&text[..start_pos]
} else {
""
};
let after = if end_abs < text.len() {
&text[end_abs..]
} else {
""
};
let normal_text = format!("{}{}", before, after);
return Ok((normal_text, tools));
}
}
Ok(vec![])
Ok((text.to_string(), vec![]))
}
async fn parse_incremental(
......@@ -289,11 +303,11 @@ mod tests {
</steptml:invoke><|tool_call_end|>
<|tool_calls_end|>More text"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("Tokyo"));
assert!(result[0].function.arguments.contains("celsius"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("Tokyo"));
assert!(tools[0].function.arguments.contains("celsius"));
}
#[tokio::test]
......@@ -308,10 +322,10 @@ mod tests {
</steptml:invoke><|tool_call_end|>
<|tool_calls_end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "calculate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "calculate");
}
#[tokio::test]
......@@ -326,12 +340,12 @@ mod tests {
</steptml:invoke><|tool_call_end|>
<|tool_calls_end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process_data");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process_data");
// Parse arguments to check types
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["count"], 42);
assert_eq!(args["active"], true);
assert_eq!(args["rate"], 1.5);
......
This diff is collapsed.
......@@ -9,7 +9,8 @@ use async_trait::async_trait;
#[async_trait]
pub trait ToolParser: Send + Sync {
/// Parse complete tool calls from final output
async fn parse_complete(&self, output: &str) -> ToolParserResult<Vec<ToolCall>>;
/// Returns (remaining_normal_text, tool_calls) tuple
async fn parse_complete(&self, output: &str) -> ToolParserResult<(String, Vec<ToolCall>)>;
/// Parse tool calls from model output (streaming)
async fn parse_incremental(
......
......@@ -13,11 +13,11 @@ async fn test_deepseek_complete_parsing() {
```<|tool▁call▁end|><|tool▁calls▁end|>
The weather in Tokyo is..."#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["location"], "Tokyo");
assert_eq!(args["units"], "celsius");
}
......@@ -37,10 +37,10 @@ async fn test_deepseek_multiple_tools() {
```<|tool▁call▁end|>
<|tool▁calls▁end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "translate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "translate");
}
#[tokio::test]
......@@ -96,11 +96,11 @@ async fn test_deepseek_nested_json() {
}
```<|tool▁call▁end|><|tool▁calls▁end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert!(args["data"]["nested"]["deep"].is_array());
}
......@@ -134,10 +134,10 @@ async fn test_deepseek_malformed_json_handling() {
```<|tool▁call▁end|>
<|tool▁calls▁end|>"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
// Only the valid tool call should be parsed
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "valid");
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "valid");
}
#[tokio::test]
......@@ -151,9 +151,9 @@ async fn test_normal_text_extraction() {
{"location": "Tokyo"}
```<|tool▁call▁end|><|tool▁calls▁end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
// TODO: Verify normal text extraction when parser returns it
// In Python: normal_text = "Let me help you with that."
......@@ -174,8 +174,8 @@ async fn test_multiple_tool_calls() {
```<|tool▁call▁end|>
<|tool▁calls▁end|><|end▁of▁sentence|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "get_weather");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "get_weather");
}
......@@ -16,9 +16,9 @@ async fn test_empty_input() {
let parser = registry
.get_parser(&format!("test-{}", parser_name))
.unwrap();
let result = parser.parse_complete("").await.unwrap();
let (_normal_text, tools) = parser.parse_complete("").await.unwrap();
assert_eq!(
result.len(),
tools.len(),
0,
"Parser {} should return empty for empty input",
parser_name
......@@ -32,7 +32,12 @@ async fn test_plain_text_no_tools() {
let json_parser = JsonParser::new();
assert_eq!(
json_parser.parse_complete(plain_text).await.unwrap().len(),
json_parser
.parse_complete(plain_text)
.await
.unwrap()
.1
.len(),
0
);
......@@ -42,13 +47,19 @@ async fn test_plain_text_no_tools() {
.parse_complete(plain_text)
.await
.unwrap()
.1
.len(),
0
);
let qwen_parser = QwenParser::new();
assert_eq!(
qwen_parser.parse_complete(plain_text).await.unwrap().len(),
qwen_parser
.parse_complete(plain_text)
.await
.unwrap()
.1
.len(),
0
);
......@@ -58,6 +69,7 @@ async fn test_plain_text_no_tools() {
.parse_complete(plain_text)
.await
.unwrap()
.1
.len(),
0
);
......@@ -74,9 +86,9 @@ async fn test_incomplete_json() {
];
for input in incomplete_cases {
let result = json_parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(
result.len(),
tools.len(),
0,
"Should not parse incomplete JSON: {}",
input
......@@ -106,9 +118,9 @@ async fn test_malformed_mistral() {
for input in malformed_cases {
// Parser might return error or empty vec for malformed input
if let Ok(result) = parser.parse_complete(input).await {
if let Ok((_normal_text, tools)) = parser.parse_complete(input).await {
assert_eq!(
result.len(),
tools.len(),
0,
"Should not parse malformed Mistral: {}",
input
......@@ -124,13 +136,13 @@ async fn test_missing_required_fields() {
// Missing name field
let input = r#"{"arguments": {"x": 1}}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should not parse without name field");
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should not parse without name field");
// Name is not a string
let input = r#"{"name": 123, "arguments": {}}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should not parse with non-string name");
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should not parse with non-string name");
}
#[tokio::test]
......@@ -143,11 +155,11 @@ async fn test_very_long_strings() {
long_string
);
let result = json_parser.parse_complete(&input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = json_parser.parse_complete(&input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["data"].as_str().unwrap().len(), 10000);
}
......@@ -158,10 +170,10 @@ async fn test_unicode_edge_cases() {
// Various Unicode characters including emojis, CJK, RTL text
let input = r#"{"name": "translate", "arguments": {"text": "Hello 世界 🌍 مرحبا עולם"}}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["text"], "Hello 世界 🌍 مرحبا עולם");
}
......@@ -169,16 +181,16 @@ async fn test_unicode_edge_cases() {
async fn test_nested_brackets_in_strings() {
let mistral_parser = MistralParser::new();
let input = r#"[TOOL_CALLS] [{"name": "echo", "arguments": {"text": "Array: [1, 2, 3]"}}]"#;
let result = mistral_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let (_normal_text, tools) = mistral_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["text"], "Array: [1, 2, 3]");
let pythonic_parser = PythonicParser::new();
let input = r#"[echo(text="List: [a, b, c]")]"#;
let result = pythonic_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let (_normal_text, tools) = pythonic_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["text"], "List: [a, b, c]");
}
......@@ -191,9 +203,9 @@ async fn test_multiple_formats_in_text() {
And some more text with <tool_call> tags.
"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "actual_tool");
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "actual_tool");
}
#[tokio::test]
......@@ -202,10 +214,10 @@ async fn test_escaped_characters() {
let input = r#"{"name": "write", "arguments": {"content": "Line 1\nLine 2\r\nLine 3\tTabbed\\Backslash\"Quote"}}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
let content = args["content"].as_str().unwrap();
assert!(content.contains('\n'));
assert!(content.contains('\t'));
......@@ -229,10 +241,10 @@ async fn test_numeric_edge_cases() {
}
}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["int"], 42);
assert_eq!(args["float"], 123.456);
assert_eq!(args["scientific"], 0.000123);
......@@ -254,10 +266,10 @@ async fn test_null_and_boolean_values() {
}
}"#;
let result = json_parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = json_parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["enabled"], true);
assert_eq!(args["disabled"], false);
assert_eq!(args["optional"], serde_json::Value::Null);
......
//! Tests for tool parser fallback behavior
//!
//! When tool call parsing fails, the original text should be preserved as normal text
//! rather than being lost. This ensures graceful degradation.
use sglang_router_rs::tool_parser::{
DeepSeekParser, JsonParser, LlamaParser, MistralParser, QwenParser, ToolParser,
};
#[tokio::test]
async fn test_json_parser_invalid_json_returns_as_normal_text() {
let parser = JsonParser::new();
// Malformed JSON should be returned as normal text (note: commas may be processed)
let input = r#"{"name": "test", "arguments": invalid json here}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(
normal_text,
r#"{"name": "test", "arguments": invalid json here}"#
);
// Plain text with no JSON structure should be returned as normal text
let input = "This is just plain text that should not be parsed as a tool call";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input);
// Text that looks like it might have JSON but doesn't should be returned as normal text
let input = "The user said: {something} but it's not valid JSON";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input);
}
#[tokio::test]
async fn test_qwen_parser_invalid_format_returns_as_normal_text() {
let parser = QwenParser::new();
// Missing closing tag
let input = r#"<tool_call>
{"name": "test", "arguments": {}}
This text is missing the closing tag"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve original text when no valid tools found
// Malformed JSON inside valid tags
let input = r#"<tool_call>
{"name": "test", "arguments": invalid}
</tool_call>"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
// When JSON parsing fails but tags are present, it should preserve the original text
assert_eq!(normal_text, input);
// Plain text without any tool markers
let input = "This is a regular response without any tool calls.";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should return original text when no markers found
}
#[tokio::test]
async fn test_llama_parser_invalid_format_returns_as_normal_text() {
let parser = LlamaParser::new();
// Invalid JSON after python_tag
let input = r#"<|python_tag|>{"name": "test", "arguments": invalid}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve original text when parsing fails
// Plain text without markers or JSON
let input = "Just explaining something without any function calls.";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should return original text
// Text with python_tag but completely invalid content
let input = r#"Here's my response <|python_tag|>not even close to JSON"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve everything when parsing fails
}
#[tokio::test]
async fn test_mistral_parser_invalid_format_returns_as_normal_text() {
let parser = MistralParser::new();
// Missing closing bracket
let input = r#"[TOOL_CALLS] [{"name": "test", "arguments": {}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve original text when parsing fails
// Invalid JSON in tool calls section
let input = r#"[TOOL_CALLS] [{"name": invalid json}]"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve original text when parsing fails
// Plain text
let input = "No tool calls here, just regular text.";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should return original text
}
#[tokio::test]
async fn test_deepseek_parser_invalid_format_returns_as_normal_text() {
let parser = DeepSeekParser::new();
// Invalid JSON after emoji marker
let input = r#"🤔[{"name": "test", "arguments": malformed}]"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should preserve original text when parsing fails
// Emoji but no JSON array
let input = "🤔 Just thinking about this problem...";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should return original text
// No emoji marker at all
let input = "Regular response without any special markers.";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Should return original text
}
#[tokio::test]
async fn test_mixed_valid_and_invalid_content() {
let parser = QwenParser::new();
// Text with one valid tool call and one invalid
let input = r#"Let me help you with that.
<tool_call>
{"name": "valid_tool", "arguments": {"x": 1}}
</tool_call>
And here's another one:
<tool_call>
{"name": "invalid_tool", "arguments": malformed}
</tool_call>
That's all!"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1); // Should extract the valid tool
assert_eq!(tools[0].function.name, "valid_tool");
// Normal text should contain the text around the valid tool call
assert!(normal_text.contains("Let me help you"));
assert!(normal_text.contains("That's all!"));
}
#[tokio::test]
async fn test_partial_tool_markers() {
// Test cases where tool markers are incomplete or cut off
let parser = QwenParser::new();
let input = "<tool_call>\nThis looks like it might be a tool call but it's not";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input);
let parser = MistralParser::new();
let input = "[TOOL_CALLS] But then nothing follows...";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input);
let parser = LlamaParser::new();
let input = "Starting a response <|python_tag|> but no JSON";
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input);
}
#[tokio::test]
async fn test_escaped_json_like_content() {
// Test that JSON-like content in regular text doesn't get parsed as tools
let parser = JsonParser::new();
let input = r#"The user typed: {"name": "example"} but this is just quoted text"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
// JsonParser should extract the valid JSON and return normal text
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "example");
assert_eq!(normal_text, "The user typed: but this is just quoted text");
let parser = QwenParser::new();
let input = r#"The syntax is: <tool_call>
{"name": "example"}
</tool_call> - that's how you format it"#;
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
// This actually contains valid tool call syntax, so it should parse
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "example");
}
#[tokio::test]
async fn test_unicode_and_special_chars_in_failed_parsing() {
let parser = QwenParser::new();
// Unicode in malformed tool calls
let input = r#"<tool_call>
{"name": "测试", "arguments": 🚀 invalid}
</tool_call>"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
// Should handle Unicode properly in the fallback text
assert!(!normal_text.is_empty() || normal_text == input);
// Special characters that might confuse parsers
let input = r#"Response: <tool_call>{"name": "test\n\t", "arguments": {"]}"}</tool_call>"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
// This might or might not parse depending on JSON handling of escape sequences
if tools.is_empty() {
assert!(!normal_text.is_empty() || normal_text == input);
}
}
#[tokio::test]
async fn test_very_long_invalid_input() {
let parser = JsonParser::new();
// Generate a very long string that looks like it might be JSON but isn't
let mut input = String::from("{\"name\": \"test\", \"arguments\": {");
for i in 0..1000 {
input.push_str(&format!("\"field{}\": \"value{}\", ", i, i));
}
input.push_str("\"final\": incomplete"); // Don't close the JSON properly
let (normal_text, tools) = parser.parse_complete(&input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(normal_text, input); // Invalid JSON should be returned as normal text
}
#[tokio::test]
async fn test_almost_valid_tool_calls() {
// Test tool calls that are almost valid but have small issues
let parser = JsonParser::new();
// Missing closing quote should be returned as normal text
let input = r#"{"name": "test", "arguments": {"key": "value}}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0);
assert_eq!(
normal_text,
r#"{"name": "test", "arguments": {"key": "value}}"#
);
// Extra comma
let input = r#"{"name": "test", "arguments": {},}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
// Some JSON parsers might accept trailing commas
if tools.is_empty() {
assert_eq!(normal_text, r#"{"name": "test", "arguments": ,}"#);
}
// Wrong quote types
let input = r#"{'name': 'test', 'arguments': {}}"#;
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0); // Standard JSON requires double quotes
assert_eq!(normal_text, r#"{'name': 'test', 'arguments': }"#);
}
......@@ -15,11 +15,11 @@ async fn test_glm4_complete_parsing() {
</tool_call>
The weather will be..."#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["city"], "Beijing");
assert_eq!(args["date"], "2024-12-25");
}
......@@ -39,10 +39,10 @@ async fn test_glm4_multiple_tools() {
<arg_value>zh</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "search");
assert_eq!(result[1].function.name, "translate");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "search");
assert_eq!(tools[1].function.name, "translate");
}
#[tokio::test]
......@@ -62,10 +62,10 @@ async fn test_glm4_type_conversion() {
<arg_value>string value</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["count"], 42);
assert_eq!(args["rate"], 1.5);
assert_eq!(args["enabled"], true);
......@@ -138,10 +138,10 @@ async fn test_glm4_python_literal_values() {
<arg_value>None</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["debug"], true);
assert_eq!(args["verbose"], false);
assert_eq!(args["optional"], serde_json::Value::Null);
......@@ -160,11 +160,11 @@ async fn test_python_literals() {
<arg_value>None</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test_func");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test_func");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["bool_true"], true);
assert_eq!(args["bool_false"], false);
assert_eq!(args["none_val"], serde_json::Value::Null);
......@@ -181,10 +181,10 @@ async fn test_nested_values() {
<arg_value>[1, 2, 3]</arg_value>
</tool_call>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert!(args["data"].is_object());
assert!(args["list"].is_array());
}
......@@ -10,11 +10,11 @@ async fn test_gpt_oss_complete_parsing() {
<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query": "rust programming", "limit": 10}<|call|>
Here are the results..."#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "search");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "search");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert_eq!(args["query"], "rust programming");
assert_eq!(args["limit"], 10);
}
......@@ -26,10 +26,10 @@ async fn test_gpt_oss_multiple_tools() {
let input = r#"<|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location": "Paris"}<|call|>commentary
<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query": "Paris tourism"}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "search");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "search");
}
#[tokio::test]
......@@ -39,10 +39,10 @@ async fn test_gpt_oss_with_namespace() {
let input = r#"<|channel|>commentary to=api.users.create<|constrain|>json<|message|>{"name": "John", "email": "john@example.com"}<|call|>
<|channel|>commentary to=tools.calculator.add<|constrain|>json<|message|>{"x": 10, "y": 20}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "create"); // Should extract last part
assert_eq!(result[1].function.name, "add");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "create"); // Should extract last part
assert_eq!(tools[1].function.name, "add");
}
#[tokio::test]
......@@ -51,9 +51,9 @@ async fn test_gpt_oss_with_assistant_prefix() {
let input = r#"<|start|>assistant<|channel|>commentary to=functions.test<|constrain|>json<|message|>{"key": "value"}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
}
#[tokio::test]
......@@ -63,10 +63,10 @@ async fn test_gpt_oss_empty_args() {
let input =
r#"<|channel|>commentary to=functions.get_time<|constrain|>json<|message|>{}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_time");
assert_eq!(result[0].function.arguments, "{}");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_time");
assert_eq!(tools[0].function.arguments, "{}");
}
#[tokio::test]
......@@ -127,9 +127,9 @@ async fn test_gpt_oss_with_whitespace() {
let input = r#"<|channel|>commentary to=functions.test <|constrain|>json<|message|>{"key": "value"}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
}
#[tokio::test]
......@@ -145,11 +145,11 @@ async fn test_gpt_oss_complex_json() {
}
}<|call|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process");
let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
assert!(args["nested"]["data"].is_array());
assert_eq!(args["nested"]["config"]["enabled"], true);
}
......@@ -161,9 +161,9 @@ async fn test_commentary_without_function() {
// Python should extract commentary as normal text
let input = r#"<|channel|>commentary<|message|>**Action plan**: 1. Do X 2. Do Y<|end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0); // No tool calls
// TODO: Verify normal text = "**Action plan**: 1. Do X 2. Do Y"
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0); // No tool calls
// TODO: Verify normal text = "**Action plan**: 1. Do X 2. Do Y"
}
#[tokio::test]
......@@ -173,9 +173,9 @@ async fn test_final_channel() {
let input = r#"<|channel|>commentary to=functions.test<|constrain|>json<|message|>{"x": 1}<|call|>
<|channel|>final<|message|>The result is calculated.<|return|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
// TODO: Verify normal text = "The result is calculated."
}
......@@ -187,8 +187,8 @@ async fn test_mixed_commentary_and_calls() {
<|channel|>commentary to=functions.calc<|constrain|>json<|message|>{"x": 5}<|call|>
<|channel|>commentary<|message|>Processing...<|end|>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "calc");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "calc");
// TODO: Verify normal text = "Let me think Processing..."
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment