Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f556ac8b
Unverified
Commit
f556ac8b
authored
Aug 22, 2025
by
Simo Lin
Committed by
GitHub
Aug 22, 2025
Browse files
[router] add json tool parser (#9516)
parent
110a6598
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1049 additions
and
9 deletions
+1049
-9
sgl-router/src/tool_parser/json_parser.rs
sgl-router/src/tool_parser/json_parser.rs
+390
-0
sgl-router/src/tool_parser/mod.rs
sgl-router/src/tool_parser/mod.rs
+2
-1
sgl-router/src/tool_parser/registry.rs
sgl-router/src/tool_parser/registry.rs
+20
-8
sgl-router/src/tool_parser/tests.rs
sgl-router/src/tool_parser/tests.rs
+637
-0
No files found.
sgl-router/src/tool_parser/json_parser.rs
0 → 100644
View file @
f556ac8b
use
async_trait
::
async_trait
;
use
regex
::
Regex
;
use
serde_json
::
Value
;
use
crate
::
tool_parser
::{
errors
::{
ToolParserError
,
ToolParserResult
},
partial_json
::
PartialJson
,
state
::
ParseState
,
traits
::
ToolParser
,
types
::{
FunctionCall
,
StreamResult
,
ToolCall
},
};
/// JSON format parser for tool calls
///
/// Handles various JSON formats for function calling:
/// - Single tool call: {"name": "fn", "arguments": {...}}
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
///
/// Supports configurable token markers for different models
pub
struct
JsonParser
{
/// Token(s) that mark the start of tool calls
start_tokens
:
Vec
<
String
>
,
/// Token(s) that mark the end of tool calls
end_tokens
:
Vec
<
String
>
,
/// Separator between multiple tool calls (reserved for future use)
_
separator
:
String
,
/// Parser for handling incomplete JSON during streaming
partial_json
:
PartialJson
,
/// Regex patterns for extracting content between tokens
extractors
:
Vec
<
Regex
>
,
}
impl
JsonParser
{
/// Create a new JSON parser with default configuration
pub
fn
new
()
->
Self
{
Self
::
with_config
(
vec!
[],
// No wrapper tokens by default
vec!
[],
", "
.to_string
(),
)
}
/// Create a parser with custom token configuration
pub
fn
with_config
(
start_tokens
:
Vec
<
String
>
,
end_tokens
:
Vec
<
String
>
,
separator
:
String
,
)
->
Self
{
// Build extraction patterns for each token pair
let
extractors
=
start_tokens
.iter
()
.zip
(
end_tokens
.iter
())
.filter_map
(|(
start
,
end
)|
{
if
!
start
.is_empty
()
&&
!
end
.is_empty
()
{
// Use (?s) flag to enable DOTALL mode so . matches newlines
let
pattern
=
format!
(
r"(?s){}(.*?){}"
,
regex
::
escape
(
start
),
regex
::
escape
(
end
));
Regex
::
new
(
&
pattern
)
.ok
()
}
else
{
None
}
})
.collect
();
Self
{
start_tokens
,
end_tokens
,
_
separator
:
separator
,
partial_json
:
PartialJson
::
default
(),
extractors
,
}
}
/// Extract JSON content from text, handling wrapper tokens if configured
fn
extract_json_content
<
'a
>
(
&
self
,
text
:
&
'a
str
)
->
&
'a
str
{
let
mut
content
=
text
.trim
();
// Try each extractor pattern
for
extractor
in
&
self
.extractors
{
if
let
Some
(
captures
)
=
extractor
.captures
(
content
)
{
if
let
Some
(
matched
)
=
captures
.get
(
1
)
{
content
=
matched
.as_str
()
.trim
();
break
;
}
}
}
// Handle special case where there's a start token but no end token
for
(
start
,
end
)
in
self
.start_tokens
.iter
()
.zip
(
self
.end_tokens
.iter
())
{
if
!
start
.is_empty
()
&&
end
.is_empty
()
{
content
=
content
.strip_prefix
(
start
)
.unwrap_or
(
content
);
}
}
content
}
/// Parse a single JSON object into a ToolCall
fn
parse_single_object
(
&
self
,
obj
:
&
Value
)
->
ToolParserResult
<
Option
<
ToolCall
>>
{
// Check if this looks like a tool call
let
name
=
obj
.get
(
"name"
)
.or_else
(||
obj
.get
(
"function"
))
.and_then
(|
v
|
v
.as_str
());
if
let
Some
(
name
)
=
name
{
// Get arguments - support both "arguments" and "parameters" keys
let
empty_obj
=
Value
::
Object
(
serde_json
::
Map
::
new
());
let
args
=
obj
.get
(
"arguments"
)
.or_else
(||
obj
.get
(
"parameters"
))
.unwrap_or
(
&
empty_obj
);
// Convert arguments to JSON string
let
arguments
=
serde_json
::
to_string
(
args
)
.map_err
(|
e
|
ToolParserError
::
ParsingFailed
(
e
.to_string
()))
?
;
// Generate a unique ID if not provided
let
id
=
obj
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.map
(
String
::
from
)
.unwrap_or_else
(||
format!
(
"call_{}"
,
uuid
::
Uuid
::
new_v4
()));
Ok
(
Some
(
ToolCall
{
id
,
r
#
type
:
"function"
.to_string
(),
function
:
FunctionCall
{
name
:
name
.to_string
(),
arguments
,
},
}))
}
else
{
Ok
(
None
)
}
}
/// Parse JSON value(s) into tool calls
fn
parse_json_value
(
&
self
,
value
:
&
Value
)
->
ToolParserResult
<
Vec
<
ToolCall
>>
{
let
mut
tools
=
Vec
::
new
();
match
value
{
Value
::
Array
(
arr
)
=>
{
// Parse each element in the array
for
item
in
arr
{
if
let
Some
(
tool
)
=
self
.parse_single_object
(
item
)
?
{
tools
.push
(
tool
);
}
}
}
Value
::
Object
(
_
)
=>
{
// Single tool call
if
let
Some
(
tool
)
=
self
.parse_single_object
(
value
)
?
{
tools
.push
(
tool
);
}
}
_
=>
{
// Not a valid tool call format
return
Ok
(
vec!
[]);
}
}
Ok
(
tools
)
}
/// Check if text contains potential tool call markers
fn
has_tool_markers
(
&
self
,
text
:
&
str
)
->
bool
{
// If no start tokens configured, check for JSON structure
if
self
.start_tokens
.is_empty
()
{
// For JSON, we just need to see the start of an object or array
return
text
.contains
(
'{'
)
||
text
.contains
(
'['
);
}
// Check for any start token
self
.start_tokens
.iter
()
.any
(|
token
|
text
.contains
(
token
))
}
}
impl
Default
for
JsonParser
{
fn
default
()
->
Self
{
Self
::
new
()
}
}
#[async_trait]
impl
ToolParser
for
JsonParser
{
async
fn
parse_complete
(
&
self
,
text
:
&
str
)
->
ToolParserResult
<
Vec
<
ToolCall
>>
{
// Extract JSON content from wrapper tokens if present
let
json_content
=
self
.extract_json_content
(
text
);
// Try to parse as JSON
match
serde_json
::
from_str
::
<
Value
>
(
json_content
)
{
Ok
(
value
)
=>
self
.parse_json_value
(
&
value
),
Err
(
_
)
=>
{
// Not valid JSON, return empty
Ok
(
vec!
[])
}
}
}
async
fn
parse_incremental
(
&
self
,
chunk
:
&
str
,
state
:
&
mut
ParseState
,
)
->
ToolParserResult
<
StreamResult
>
{
state
.buffer
.push_str
(
chunk
);
// Check if we have potential tool calls
if
!
self
.has_tool_markers
(
&
state
.buffer
)
{
// No tool markers, return as incomplete
return
Ok
(
StreamResult
::
Incomplete
);
}
// Extract JSON content
let
json_content
=
self
.extract_json_content
(
&
state
.buffer
);
// Try to parse with partial JSON parser
match
self
.partial_json
.parse_value
(
json_content
)
{
Ok
((
value
,
consumed
))
=>
{
// Check if we have a complete JSON structure
if
consumed
==
json_content
.len
()
{
// Complete JSON, parse tool calls
let
tools
=
self
.parse_json_value
(
&
value
)
?
;
if
!
tools
.is_empty
()
{
// Clear buffer since we consumed everything
state
.buffer
.clear
();
// Return the first tool as complete (simplified for Phase 2)
if
let
Some
(
tool
)
=
tools
.into_iter
()
.next
()
{
return
Ok
(
StreamResult
::
ToolComplete
(
tool
));
}
}
}
else
{
// Partial JSON, try to extract tool name
if
let
Some
(
name
)
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
{
// Simple implementation for Phase 2
// Just return the tool name once we see it
if
!
state
.in_string
{
state
.in_string
=
true
;
// Use as a flag for "name sent"
return
Ok
(
StreamResult
::
ToolName
{
index
:
0
,
name
:
name
.to_string
(),
});
}
// Check for complete arguments
if
let
Some
(
args
)
=
value
.get
(
"arguments"
)
.or_else
(||
value
.get
(
"parameters"
))
{
if
let
Ok
(
args_str
)
=
serde_json
::
to_string
(
args
)
{
// Return arguments as a single update
return
Ok
(
StreamResult
::
ToolArguments
{
index
:
0
,
arguments
:
args_str
,
});
}
}
}
}
}
Err
(
_
)
=>
{
// Failed to parse even as partial JSON
// Keep buffering
}
}
Ok
(
StreamResult
::
Incomplete
)
}
fn
detect_format
(
&
self
,
text
:
&
str
)
->
bool
{
// Check if text contains JSON-like structure
if
self
.has_tool_markers
(
text
)
{
// Try to extract and parse
let
json_content
=
self
.extract_json_content
(
text
);
// Check if it looks like valid JSON for tool calls
if
let
Ok
(
value
)
=
serde_json
::
from_str
::
<
Value
>
(
json_content
)
{
match
value
{
Value
::
Object
(
ref
obj
)
=>
{
// Check for tool call structure
obj
.contains_key
(
"name"
)
||
obj
.contains_key
(
"function"
)
}
Value
::
Array
(
ref
arr
)
=>
{
// Check if array contains tool-like objects
arr
.iter
()
.any
(|
v
|
{
v
.as_object
()
.is_some_and
(|
o
|
{
o
.contains_key
(
"name"
)
||
o
.contains_key
(
"function"
)
})
})
}
_
=>
false
,
}
}
else
{
false
}
}
else
{
false
}
}
}
#[cfg(test)]
mod
tests
{
use
super
::
*
;
#[tokio::test]
async
fn
test_parse_single_tool_call
()
{
let
parser
=
JsonParser
::
new
();
let
input
=
r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_weather"
);
}
#[tokio::test]
async
fn
test_parse_multiple_tool_calls
()
{
let
parser
=
JsonParser
::
new
();
let
input
=
r#"[
{"name": "get_weather", "arguments": {"location": "SF"}},
{"name": "search", "arguments": {"query": "news"}}
]"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
2
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_weather"
);
assert_eq!
(
result
[
1
]
.function.name
,
"search"
);
}
#[tokio::test]
async
fn
test_parse_with_parameters_key
()
{
let
parser
=
JsonParser
::
new
();
let
input
=
r#"{"name": "calculate", "parameters": {"x": 10, "y": 20}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"calculate"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"10"
));
}
#[tokio::test]
async
fn
test_parse_with_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
vec!
[
"<tool>"
.to_string
()],
vec!
[
"</tool>"
.to_string
()],
", "
.to_string
(),
);
let
input
=
r#"<tool>{"name": "test", "arguments": {}}</tool>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test"
);
}
#[test]
fn
test_detect_format
()
{
let
parser
=
JsonParser
::
new
();
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "arguments": {}}"#
));
assert
!
(
parser
.detect_format
(
r#"[{"name": "test"}]"#
));
assert
!
(
!
parser
.detect_format
(
"plain text"
));
assert
!
(
!
parser
.detect_format
(
r#"{"key": "value"}"#
));
}
#[tokio::test]
async
fn
test_streaming_parse
()
{
// Phase 2 simplified streaming test
// Just verify that streaming eventually produces a complete tool call
let
parser
=
JsonParser
::
new
();
let
mut
state
=
ParseState
::
new
();
// Send complete JSON in one go (simplified for Phase 2)
let
full_json
=
r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#
;
let
result
=
parser
.parse_incremental
(
full_json
,
&
mut
state
)
.await
.unwrap
();
// Should get a complete tool immediately with complete JSON
match
result
{
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_weather"
);
assert
!
(
tool
.function.arguments
.contains
(
"SF"
));
}
_
=>
panic!
(
"Expected ToolComplete for complete JSON input"
),
}
}
}
sgl-router/src/tool_parser/mod.rs
View file @
f556ac8b
/// Tool parser module for handling function/tool calls in model outputs
/// Tool parser module for handling function/tool calls in model outputs
///
///
/// This module provides infrastructure for parsing tool calls from various model formats.
/// This module provides infrastructure for parsing tool calls from various model formats.
/// Phase 1 focuses on core infrastructure: types, traits, registry, and partial JSON parsing.
pub
mod
errors
;
pub
mod
errors
;
pub
mod
json_parser
;
pub
mod
partial_json
;
pub
mod
partial_json
;
pub
mod
registry
;
pub
mod
registry
;
pub
mod
state
;
pub
mod
state
;
...
@@ -14,6 +14,7 @@ mod tests;
...
@@ -14,6 +14,7 @@ mod tests;
// Re-export commonly used types
// Re-export commonly used types
pub
use
errors
::{
ToolParserError
,
ToolParserResult
};
pub
use
errors
::{
ToolParserError
,
ToolParserResult
};
pub
use
json_parser
::
JsonParser
;
pub
use
registry
::
ParserRegistry
;
pub
use
registry
::
ParserRegistry
;
pub
use
state
::{
ParsePhase
,
ParseState
};
pub
use
state
::{
ParsePhase
,
ParseState
};
pub
use
traits
::{
PartialJsonParser
,
ToolParser
};
pub
use
traits
::{
PartialJsonParser
,
ToolParser
};
...
...
sgl-router/src/tool_parser/registry.rs
View file @
f556ac8b
use
crate
::
tool_parser
::
json_parser
::
JsonParser
;
use
crate
::
tool_parser
::
traits
::
ToolParser
;
use
crate
::
tool_parser
::
traits
::
ToolParser
;
use
std
::
collections
::
HashMap
;
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
...
@@ -21,6 +22,9 @@ impl ParserRegistry {
...
@@ -21,6 +22,9 @@ impl ParserRegistry {
default_parser
:
"json"
.to_string
(),
default_parser
:
"json"
.to_string
(),
};
};
// Register default parsers
registry
.register_default_parsers
();
// Register default model mappings
// Register default model mappings
registry
.register_default_mappings
();
registry
.register_default_mappings
();
...
@@ -75,6 +79,14 @@ impl ParserRegistry {
...
@@ -75,6 +79,14 @@ impl ParserRegistry {
.collect
()
.collect
()
}
}
/// Register default parsers
fn
register_default_parsers
(
&
mut
self
)
{
// JSON parser - most common format
self
.register_parser
(
"json"
,
Arc
::
new
(
JsonParser
::
new
()));
// Note: Additional parsers (mistral, qwen, llama) will be added in later phases
}
/// Register default model mappings
/// Register default model mappings
fn
register_default_mappings
(
&
mut
self
)
{
fn
register_default_mappings
(
&
mut
self
)
{
// OpenAI models
// OpenAI models
...
@@ -85,16 +97,16 @@ impl ParserRegistry {
...
@@ -85,16 +97,16 @@ impl ParserRegistry {
// Anthropic models
// Anthropic models
self
.map_model
(
"claude-*"
,
"json"
);
self
.map_model
(
"claude-*"
,
"json"
);
// Mistral models
// Mistral models
(will use json until mistral parser is implemented)
self
.map_model
(
"mistral-*"
,
"
mistral
"
);
self
.map_model
(
"mistral-*"
,
"
json
"
);
self
.map_model
(
"mixtral-*"
,
"
mistral
"
);
self
.map_model
(
"mixtral-*"
,
"
json
"
);
// Qwen models
// Qwen models
(will use json until qwen parser is implemented)
self
.map_model
(
"qwen*"
,
"
qwe
n"
);
self
.map_model
(
"qwen*"
,
"
jso
n"
);
// Llama models
// Llama models
(will use json until llama parser is implemented)
self
.map_model
(
"llama-*"
,
"
llama
"
);
self
.map_model
(
"llama-*"
,
"
json
"
);
self
.map_model
(
"meta-llama-*"
,
"
llama
"
);
self
.map_model
(
"meta-llama-*"
,
"
json
"
);
// Other models default to JSON
// Other models default to JSON
self
.map_model
(
"gemini-*"
,
"json"
);
self
.map_model
(
"gemini-*"
,
"json"
);
...
...
sgl-router/src/tool_parser/tests.rs
View file @
f556ac8b
use
super
::
*
;
use
super
::
*
;
use
crate
::
tool_parser
::
json_parser
::
JsonParser
;
use
crate
::
tool_parser
::
partial_json
::{
use
crate
::
tool_parser
::
partial_json
::{
compute_diff
,
find_common_prefix
,
is_complete_json
,
PartialJson
,
compute_diff
,
find_common_prefix
,
is_complete_json
,
PartialJson
,
};
};
use
crate
::
tool_parser
::
traits
::
ToolParser
;
#[test]
#[test]
fn
test_parse_state_new
()
{
fn
test_parse_state_new
()
{
...
@@ -247,3 +249,638 @@ fn test_partial_tool_call() {
...
@@ -247,3 +249,638 @@ fn test_partial_tool_call() {
assert
!
(
partial
.name_sent
);
assert
!
(
partial
.name_sent
);
assert_eq!
(
partial
.streamed_args
,
r#"{"key": "#
);
assert_eq!
(
partial
.streamed_args
,
r#"{"key": "#
);
}
}
#[tokio::test]
async
fn
test_json_parser_complete_single
()
{
let
parser
=
JsonParser
::
new
();
// Test single tool call with arguments
let
input
=
r#"{"name": "get_weather", "arguments": {"location": "San Francisco", "units": "celsius"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_weather"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"San Francisco"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"celsius"
));
}
#[tokio::test]
async
fn
test_json_parser_complete_array
()
{
let
parser
=
JsonParser
::
new
();
// Test array of tool calls
let
input
=
r#"[
{"name": "get_weather", "arguments": {"location": "SF"}},
{"name": "get_news", "arguments": {"query": "technology"}}
]"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
2
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_weather"
);
assert_eq!
(
result
[
1
]
.function.name
,
"get_news"
);
}
#[tokio::test]
async
fn
test_json_parser_with_parameters
()
{
let
parser
=
JsonParser
::
new
();
// Test with "parameters" instead of "arguments"
let
input
=
r#"{"name": "calculate", "parameters": {"x": 10, "y": 20, "operation": "add"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"calculate"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"10"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"20"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"add"
));
}
#[tokio::test]
async
fn
test_json_parser_with_tokens
()
{
// Test with custom wrapper tokens
let
parser
=
JsonParser
::
with_config
(
vec!
[
"[TOOL_CALLS] ["
.to_string
()],
vec!
[
"]"
.to_string
()],
", "
.to_string
(),
);
let
input
=
r#"[TOOL_CALLS] [{"name": "search", "arguments": {"query": "rust programming"}}]"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"search"
);
}
#[tokio::test]
async
fn
test_multiline_json_with_tokens
()
{
// Test that regex with (?s) flag properly handles multi-line JSON
let
parser
=
JsonParser
::
with_config
(
vec!
[
"<tool>"
.to_string
()],
vec!
[
"</tool>"
.to_string
()],
", "
.to_string
(),
);
// Pretty-printed multi-line JSON
let
input
=
r#"<tool>{
"name": "get_weather",
"arguments": {
"location": "San Francisco",
"units": "celsius",
"include_forecast": true
}
}</tool>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_weather"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"San Francisco"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"celsius"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"true"
));
}
#[tokio::test]
async
fn
test_multiline_json_array
()
{
// Test multi-line JSON array without wrapper tokens
let
parser
=
JsonParser
::
new
();
let
input
=
r#"[
{
"name": "function1",
"arguments": {
"param1": "value1",
"param2": 42
}
},
{
"name": "function2",
"parameters": {
"data": [1, 2, 3],
"flag": false
}
}
]"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
2
);
assert_eq!
(
result
[
0
]
.function.name
,
"function1"
);
assert_eq!
(
result
[
1
]
.function.name
,
"function2"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"value1"
));
assert
!
(
result
[
1
]
.function.arguments
.contains
(
"[1,2,3]"
));
}
#[test]
fn
test_json_parser_format_detection
()
{
let
parser
=
JsonParser
::
new
();
// Should detect valid tool call formats
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "arguments": {}}"#
));
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "parameters": {"x": 1}}"#
));
assert
!
(
parser
.detect_format
(
r#"[{"name": "test"}]"#
));
// Should not detect non-tool formats
assert
!
(
!
parser
.detect_format
(
"plain text"
));
assert
!
(
!
parser
.detect_format
(
r#"{"key": "value"}"#
));
assert
!
(
!
parser
.detect_format
(
r#"{"data": {"nested": true}}"#
));
}
#[tokio::test]
async
fn
test_json_parser_streaming
()
{
// Phase 2 simplified streaming test
let
parser
=
JsonParser
::
new
();
let
mut
state
=
ParseState
::
new
();
// Test with complete JSON (simplified for Phase 2)
let
full_json
=
r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#
;
let
result
=
parser
.parse_incremental
(
full_json
,
&
mut
state
)
.await
.unwrap
();
match
result
{
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_weather"
);
assert
!
(
tool
.function.arguments
.contains
(
"San Francisco"
));
}
_
=>
panic!
(
"Expected ToolComplete for complete JSON"
),
}
}
#[tokio::test]
async
fn
test_registry_with_json_parser
()
{
let
registry
=
ParserRegistry
::
new
();
// JSON parser should be registered by default
assert
!
(
registry
.has_parser
(
"json"
));
// Should get JSON parser for OpenAI models
let
parser
=
registry
.get_parser
(
"gpt-4-turbo"
)
.unwrap
();
// Test that the parser works
let
input
=
r#"{"name": "test", "arguments": {"x": 1}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test"
);
}
#[tokio::test]
async
fn
test_json_parser_invalid_input
()
{
let
parser
=
JsonParser
::
new
();
// Invalid JSON should return empty results
assert_eq!
(
parser
.parse_complete
(
"not json"
)
.await
.unwrap
()
.len
(),
0
);
assert_eq!
(
parser
.parse_complete
(
"{invalid}"
)
.await
.unwrap
()
.len
(),
0
);
assert_eq!
(
parser
.parse_complete
(
""
)
.await
.unwrap
()
.len
(),
0
);
}
#[tokio::test]
async
fn
test_json_parser_empty_arguments
()
{
let
parser
=
JsonParser
::
new
();
// Tool call with no arguments
let
input
=
r#"{"name": "get_time"}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"get_time"
);
assert_eq!
(
result
[
0
]
.function.arguments
,
"{}"
);
}
#[cfg(test)]
mod
failure_cases
{
use
super
::
*
;
#[tokio::test]
async
fn
test_malformed_tool_missing_name
()
{
let
parser
=
JsonParser
::
new
();
// Missing name field
let
input
=
r#"{"arguments": {"x": 1}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should return empty for tool without name"
);
// Empty name
let
input
=
r#"{"name": "", "arguments": {"x": 1}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
,
"Should accept empty name string"
);
assert_eq!
(
result
[
0
]
.function.name
,
""
);
}
#[tokio::test]
async
fn
test_invalid_arguments_json
()
{
let
parser
=
JsonParser
::
new
();
// Arguments is a string instead of object
let
input
=
r#"{"name": "test", "arguments": "not an object"}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
// Should serialize the string as JSON
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"not an object"
));
// Arguments is a number
let
input
=
r#"{"name": "test", "arguments": 42}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.arguments
,
"42"
);
// Arguments is null
let
input
=
r#"{"name": "test", "arguments": null}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.arguments
,
"null"
);
}
#[tokio::test]
async
fn
test_broken_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
vec!
[
"<tool>"
.to_string
()],
vec!
[
"</tool>"
.to_string
()],
", "
.to_string
(),
);
// Missing end token
let
input
=
r#"<tool>{"name": "test", "arguments": {}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should fail to parse without complete wrapper"
);
// Missing start token - parser looks for complete wrapper, so this won't parse
let
input
=
r#"{"name": "test", "arguments": {}}</tool>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should not parse JSON with incomplete wrapper"
);
// Mismatched tokens
let
input
=
r#"<tool>{"name": "test", "arguments": {}}</wrong>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should fail with mismatched tokens"
);
}
#[tokio::test]
async
fn
test_invalid_json_structures
()
{
let
parser
=
JsonParser
::
new
();
// Trailing comma
let
input
=
r#"{"name": "test", "arguments": {"x": 1,}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should reject JSON with trailing comma"
);
// Missing quotes on keys
let
input
=
r#"{name: "test", arguments: {}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should reject invalid JSON syntax"
);
// Unclosed object
let
input
=
r#"{"name": "test", "arguments": {"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
0
,
"Should reject incomplete JSON"
);
}
}
#[cfg(test)]
mod
edge_cases
{
use
super
::
*
;
#[tokio::test]
async
fn
test_unicode_in_names_and_arguments
()
{
let
parser
=
JsonParser
::
new
();
// Unicode in function name
let
input
=
r#"{"name": "获取天气", "arguments": {"location": "北京"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"获取天气"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"北京"
));
// Emoji in arguments
let
input
=
r#"{"name": "send_message", "arguments": {"text": "Hello 👋 World 🌍"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"👋"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"🌍"
));
}
#[tokio::test]
async
fn
test_escaped_characters
()
{
let
parser
=
JsonParser
::
new
();
// Escaped quotes in arguments
let
input
=
r#"{"name": "echo", "arguments": {"text": "He said \"hello\""}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
r#"\"hello\""#
));
// Escaped backslashes
let
input
=
r#"{"name": "path", "arguments": {"dir": "C:\\Users\\test"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"
\\\\
"
));
// Newlines and tabs
let
input
=
r#"{"name": "format", "arguments": {"text": "line1\nline2\ttabbed"}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"
\\
n"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"
\\
t"
));
}
#[tokio::test]
async
fn
test_very_large_payloads
()
{
let
parser
=
JsonParser
::
new
();
// Large arguments object
let
mut
large_args
=
r#"{"name": "process", "arguments": {"#
.to_string
();
for
i
in
0
..
1000
{
large_args
.push_str
(
&
format!
(
r#""field_{}": "value_{}","#
,
i
,
i
));
}
large_args
.push_str
(
r#""final": "value"}}"#
);
let
result
=
parser
.parse_complete
(
&
large_args
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"process"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"field_999"
));
// Large array of tool calls
let
mut
large_array
=
"["
.to_string
();
for
i
in
0
..
100
{
if
i
>
0
{
large_array
.push
(
','
);
}
large_array
.push_str
(
&
format!
(
r#"{{"name": "func_{}", "arguments": {{}}}}"#
,
i
));
}
large_array
.push
(
']'
);
let
result
=
parser
.parse_complete
(
&
large_array
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
100
);
assert_eq!
(
result
[
99
]
.function.name
,
"func_99"
);
}
#[tokio::test]
async
fn
test_mixed_array_tools_and_non_tools
()
{
let
parser
=
JsonParser
::
new
();
// Array with both tool calls and non-tool objects
let
input
=
r#"[
{"name": "tool1", "arguments": {}},
{"not_a_tool": "just_data"},
{"name": "tool2", "parameters": {"x": 1}},
{"key": "value", "another": "field"}
]"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
2
,
"Should only parse valid tool calls"
);
assert_eq!
(
result
[
0
]
.function.name
,
"tool1"
);
assert_eq!
(
result
[
1
]
.function.name
,
"tool2"
);
}
#[tokio::test]
async
fn
test_duplicate_keys_in_json
()
{
let
parser
=
JsonParser
::
new
();
// JSON with duplicate keys (last one wins in most parsers)
let
input
=
r#"{"name": "first", "name": "second", "arguments": {"x": 1, "x": 2}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"second"
,
"Last duplicate key should win"
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"2"
),
"Last duplicate value should win"
);
}
#[tokio::test]
async
fn
test_null_values_in_arguments
()
{
let
parser
=
JsonParser
::
new
();
// Null values in arguments
let
input
=
r#"{"name": "test", "arguments": {"required": "value", "optional": null}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"null"
));
// Array with null
let
input
=
r#"{"name": "test", "arguments": {"items": [1, null, "three"]}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"null"
));
}
#[tokio::test]
async
fn
test_multiple_token_pairs_with_conflicts
()
{
// Test with overlapping token patterns
let
parser
=
JsonParser
::
with_config
(
vec!
[
"<<"
.to_string
(),
"<tool>"
.to_string
()],
vec!
[
">>"
.to_string
(),
"</tool>"
.to_string
()],
", "
.to_string
(),
);
// First pattern
let
input
=
r#"<<{"name": "test1", "arguments": {}}>>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test1"
);
// Second pattern
let
input
=
r#"<tool>{"name": "test2", "arguments": {}}</tool>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test2"
);
// Nested patterns (should use first match)
let
input
=
r#"<<tool>{"name": "test3", "arguments": {}}</tool>>"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
// This is tricky - depends on regex behavior
// The parser should handle this gracefully
assert
!
(
result
.len
()
<=
1
,
"Should not parse multiple times"
);
}
#[tokio::test]
async
fn
test_streaming_with_partial_chunks
()
{
let
parser
=
JsonParser
::
new
();
// Test 1: Very incomplete JSON (just opening brace) should return Incomplete
let
mut
state1
=
ParseState
::
new
();
let
partial
=
r#"{"#
;
let
result
=
parser
.parse_incremental
(
partial
,
&
mut
state1
)
.await
.unwrap
();
assert
!
(
matches!
(
result
,
StreamResult
::
Incomplete
),
"Should return Incomplete for just opening brace"
);
// Test 2: Complete JSON should return ToolComplete
let
mut
state2
=
ParseState
::
new
();
let
complete
=
r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#
;
let
result
=
parser
.parse_incremental
(
complete
,
&
mut
state2
)
.await
.unwrap
();
match
result
{
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_weather"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tool
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"location"
],
"SF"
);
}
_
=>
panic!
(
"Expected ToolComplete for complete JSON"
),
}
// Test 3: Partial JSON with name - Phase 2 behavior
// The PartialJson parser can complete partial JSON by filling in missing values
let
mut
state3
=
ParseState
::
new
();
let
partial_with_name
=
r#"{"name": "test", "argum"#
;
let
result
=
parser
.parse_incremental
(
partial_with_name
,
&
mut
state3
)
.await
.unwrap
();
match
result
{
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"test"
);
// Arguments will be empty object since "argum" is incomplete
assert_eq!
(
tool
.function.arguments
,
"{}"
);
}
StreamResult
::
ToolName
{
name
,
..
}
=>
{
assert_eq!
(
name
,
"test"
);
}
StreamResult
::
Incomplete
=>
{
// Also acceptable if parser decides to wait
}
_
=>
panic!
(
"Unexpected result for partial JSON with name"
),
}
}
#[tokio::test]
async
fn
test_special_json_values
()
{
let
parser
=
JsonParser
::
new
();
// Boolean values
let
input
=
r#"{"name": "toggle", "arguments": {"enabled": true, "disabled": false}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"true"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"false"
));
// Numbers (including float and negative)
let
input
=
r#"{"name": "calc", "arguments": {"int": 42, "float": 3.14, "negative": -17}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"42"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"3.14"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"-17"
));
// Empty arrays and objects
let
input
=
r#"{"name": "test", "arguments": {"empty_arr": [], "empty_obj": {}}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"[]"
));
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"{}"
));
}
#[tokio::test]
async
fn
test_function_field_alternative
()
{
let
parser
=
JsonParser
::
new
();
// Using "function" instead of "name"
let
input
=
r#"{"function": "test_func", "arguments": {"x": 1}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test_func"
);
// Both "name" and "function" present (name should take precedence)
let
input
=
r#"{"name": "primary", "function": "secondary", "arguments": {}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"primary"
);
}
#[tokio::test]
async
fn
test_whitespace_handling
()
{
let
parser
=
JsonParser
::
new
();
// Extra whitespace everywhere
let
input
=
r#" {
"name" : "test" ,
"arguments" : {
"key" : "value"
}
} "#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"test"
);
// Minified JSON (no whitespace)
let
input
=
r#"{"name":"compact","arguments":{"a":1,"b":2}}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
"compact"
);
}
}
#[cfg(test)]
mod
stress_tests
{
use
super
::
*
;
#[tokio::test]
async
fn
test_deeply_nested_arguments
()
{
let
parser
=
JsonParser
::
new
();
// Deeply nested structure
let
input
=
r#"{
"name": "nested",
"arguments": {
"level1": {
"level2": {
"level3": {
"level4": {
"level5": {
"value": "deep"
}
}
}
}
}
}
}"#
;
let
result
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert
!
(
result
[
0
]
.function.arguments
.contains
(
"deep"
));
}
#[tokio::test]
async
fn
test_concurrent_parser_usage
()
{
// Test that parser can be used concurrently
let
parser
=
std
::
sync
::
Arc
::
new
(
JsonParser
::
new
());
let
mut
handles
=
vec!
[];
for
i
in
0
..
10
{
let
parser_clone
=
parser
.clone
();
let
handle
=
tokio
::
spawn
(
async
move
{
let
input
=
format!
(
r#"{{"name": "func_{}", "arguments": {{}}}}"#
,
i
);
let
result
=
parser_clone
.parse_complete
(
&
input
)
.await
.unwrap
();
assert_eq!
(
result
.len
(),
1
);
assert_eq!
(
result
[
0
]
.function.name
,
format!
(
"func_{}"
,
i
));
});
handles
.push
(
handle
);
}
for
handle
in
handles
{
handle
.await
.unwrap
();
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment