Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4eeaff74
Unverified
Commit
4eeaff74
authored
Sep 29, 2025
by
Chang Su
Committed by
GitHub
Sep 29, 2025
Browse files
[router][tool call] Separate `JsonParser` and `LlamaParser` (#11073)
parent
a17e70f5
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
376 additions
and
993 deletions
+376
-993
sgl-router/src/tool_parser/mod.rs
sgl-router/src/tool_parser/mod.rs
+1
-1
sgl-router/src/tool_parser/parsers/json_parser.rs
sgl-router/src/tool_parser/parsers/json_parser.rs
+31
-364
sgl-router/src/tool_parser/parsers/llama_parser.rs
sgl-router/src/tool_parser/parsers/llama_parser.rs
+271
-48
sgl-router/src/tool_parser/parsers/mistral_parser.rs
sgl-router/src/tool_parser/parsers/mistral_parser.rs
+1
-28
sgl-router/src/tool_parser/parsers/pythonic_parser.rs
sgl-router/src/tool_parser/parsers/pythonic_parser.rs
+1
-26
sgl-router/src/tool_parser/parsers/qwen_parser.rs
sgl-router/src/tool_parser/parsers/qwen_parser.rs
+9
-39
sgl-router/src/tool_parser/tests.rs
sgl-router/src/tool_parser/tests.rs
+2
-137
sgl-router/tests/tool_parser_fallback.rs
sgl-router/tests/tool_parser_fallback.rs
+2
-2
sgl-router/tests/tool_parser_json.rs
sgl-router/tests/tool_parser_json.rs
+1
-31
sgl-router/tests/tool_parser_llama.rs
sgl-router/tests/tool_parser_llama.rs
+56
-68
sgl-router/tests/tool_parser_pythonic.rs
sgl-router/tests/tool_parser_pythonic.rs
+1
-2
sgl-router/tests/tool_parser_wrapper_tokens.rs
sgl-router/tests/tool_parser_wrapper_tokens.rs
+0
-247
No files found.
sgl-router/src/tool_parser/mod.rs
View file @
4eeaff74
...
...
@@ -20,7 +20,7 @@ pub use errors::{ToolParserError, ToolParserResult};
pub
use
registry
::
ParserRegistry
;
pub
use
state
::{
ParsePhase
,
ParseState
};
pub
use
traits
::{
PartialJsonParser
,
ToolParser
};
pub
use
types
::{
FunctionCall
,
PartialToolCall
,
StreamResult
,
TokenConfig
,
ToolCall
};
pub
use
types
::{
FunctionCall
,
PartialToolCall
,
StreamResult
,
ToolCall
};
// Re-export parsers for convenience
pub
use
parsers
::{
...
...
sgl-router/src/tool_parser/parsers/json_parser.rs
View file @
4eeaff74
This diff is collapsed.
Click to expand it.
sgl-router/src/tool_parser/parsers/llama_parser.rs
View file @
4eeaff74
use
async_trait
::
async_trait
;
use
serde_json
::
Value
;
use
uuid
;
use
super
::
json_parser
::
JsonParser
;
use
crate
::
tool_parser
::{
errors
::
ToolParserResult
,
errors
::{
ToolParserError
,
ToolParserResult
},
partial_json
::
PartialJson
,
state
::
ParseState
,
traits
::
ToolParser
,
types
::{
StreamResult
,
TokenConfig
,
ToolCall
},
types
::{
FunctionCall
,
StreamResult
,
ToolCall
},
};
/// Llama 3.2 format parser for tool calls
...
...
@@ -15,22 +17,124 @@ use crate::tool_parser::{
///
/// Also supports plain JSON without the python_tag prefix
pub
struct
LlamaParser
{
///
Underlying JSON parser with Llama-specific configuration
json_parser
:
J
sonPar
ser
,
///
Parser for handling incomplete JSON during streaming
partial_j
son
:
Par
tialJson
,
}
impl
LlamaParser
{
/// Create a new Llama parser
pub
fn
new
()
->
Self
{
// Configure JSON parser with Llama's python_tag token
// Note: No end token for python_tag format
let
json_parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<|python_tag|>"
.to_string
()],
end_tokens
:
vec!
[
""
.to_string
()],
// Empty end token
separator
:
";"
.to_string
(),
// Llama uses semicolon for multiple calls (though not well supported)
});
Self
{
json_parser
}
Self
{
partial_json
:
PartialJson
::
default
(),
}
}
/// Extract content after python_tag token
fn
extract_content_after_python_tag
(
&
self
,
text
:
&
str
)
->
Option
<
(
String
,
String
)
>
{
const
PYTHON_TAG
:
&
str
=
"<|python_tag|>"
;
if
let
Some
(
tag_pos
)
=
text
.find
(
PYTHON_TAG
)
{
let
normal_text
=
text
[
..
tag_pos
]
.to_string
();
let
json_content
=
text
[
tag_pos
+
PYTHON_TAG
.len
()
..
]
.to_string
();
Some
((
normal_text
,
json_content
))
}
else
{
None
}
}
/// Parse a single JSON object into a ToolCall (Llama format: name + parameters)
fn
parse_single_object
(
&
self
,
obj
:
&
Value
)
->
ToolParserResult
<
Option
<
ToolCall
>>
{
// Llama format only: {"name": "function_name", "parameters": {...}}
let
name
=
obj
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
());
if
let
Some
(
name
)
=
name
{
// Llama uses "parameters" key
let
empty_obj
=
Value
::
Object
(
serde_json
::
Map
::
new
());
let
parameters
=
obj
.get
(
"parameters"
)
.unwrap_or
(
&
empty_obj
);
// Convert parameters to JSON string
let
arguments
=
serde_json
::
to_string
(
parameters
)
.map_err
(|
e
|
ToolParserError
::
ParsingFailed
(
e
.to_string
()))
?
;
// Generate a unique ID for Llama calls
let
id
=
obj
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.map
(
String
::
from
)
.unwrap_or_else
(||
format!
(
"llama_call_{}"
,
uuid
::
Uuid
::
new_v4
()));
Ok
(
Some
(
ToolCall
{
id
,
r
#
type
:
"function"
.to_string
(),
function
:
FunctionCall
{
name
:
name
.to_string
(),
arguments
,
},
}))
}
else
{
Ok
(
None
)
}
}
/// Parse JSON value(s) into tool calls
fn
parse_json_value
(
&
self
,
value
:
&
Value
)
->
ToolParserResult
<
Vec
<
ToolCall
>>
{
let
mut
tools
=
Vec
::
new
();
match
value
{
Value
::
Array
(
arr
)
=>
{
// Parse each element in the array
for
item
in
arr
{
if
let
Some
(
tool
)
=
self
.parse_single_object
(
item
)
?
{
tools
.push
(
tool
);
}
}
}
Value
::
Object
(
_
)
=>
{
// Single tool call
if
let
Some
(
tool
)
=
self
.parse_single_object
(
value
)
?
{
tools
.push
(
tool
);
}
}
_
=>
{
// Not a valid tool call format
return
Ok
(
vec!
[]);
}
}
Ok
(
tools
)
}
/// Check if text contains potential tool call markers
fn
has_python_tag
(
&
self
,
text
:
&
str
)
->
bool
{
text
.contains
(
"<|python_tag|>"
)
}
/// Parse semicolon-separated JSON objects
fn
parse_semicolon_separated
(
&
self
,
content
:
&
str
)
->
ToolParserResult
<
Vec
<
ToolCall
>>
{
let
mut
all_tools
=
Vec
::
new
();
// Split by semicolon and parse each JSON object
for
part
in
content
.split
(
';'
)
{
let
trimmed
=
part
.trim
();
if
trimmed
.is_empty
()
{
continue
;
}
// Try to parse this part as a single JSON object
match
serde_json
::
from_str
::
<
Value
>
(
trimmed
)
{
Ok
(
value
)
=>
{
if
let
Some
(
tool
)
=
self
.parse_single_object
(
&
value
)
?
{
all_tools
.push
(
tool
);
}
}
Err
(
e
)
=>
{
// Skip invalid JSON parts in semicolon-separated list
tracing
::
warn!
(
"Failed to parse tool call: {}"
,
e
);
}
}
}
Ok
(
all_tools
)
}
}
...
...
@@ -43,31 +147,41 @@ impl Default for LlamaParser {
#[async_trait]
impl
ToolParser
for
LlamaParser
{
async
fn
parse_complete
(
&
self
,
text
:
&
str
)
->
ToolParserResult
<
(
String
,
Vec
<
ToolCall
>
)
>
{
// First try with the configured python_tag parser
let
(
_
json_normal_text
,
tools
)
=
self
.json_parser
.parse_complete
(
text
)
.await
?
;
if
!
tools
.is_empty
()
{
// Extract normal text before the python tag
// JsonParser doesn't preserve normal text for single start tokens, so we do it manually
let
normal_text
=
if
let
Some
(
tag_pos
)
=
text
.find
(
"<|python_tag|>"
)
{
text
[
..
tag_pos
]
.to_string
()
// Extract normal text and JSON content
let
(
normal_text
,
json_content
)
=
if
let
Some
((
normal
,
json
))
=
self
.extract_content_after_python_tag
(
text
)
{
(
normal
,
json
)
}
else
if
text
.trim_start
()
.starts_with
(
'{'
)
{
(
String
::
new
(),
text
.to_string
())
}
else
{
String
::
new
()
// No JSON structure found
return
Ok
((
text
.to_string
(),
vec!
[]));
};
return
Ok
((
normal_text
,
tools
));
}
// If no results and text starts with '{', try plain JSON
if
text
.trim_start
()
.starts_with
(
'{'
)
{
// Create a temporary plain JSON parser
let
plain_parser
=
JsonParser
::
new
();
let
(
_
json_normal_text
,
tools
)
=
plain_parser
.parse_complete
(
text
)
.await
?
;
// For plain JSON, don't extract normal text (consistent with JsonParser behavior)
return
Ok
((
String
::
new
(),
tools
));
// Parse the JSON content (may contain semicolon-separated objects)
let
tools
=
if
json_content
.contains
(
';'
)
{
self
.parse_semicolon_separated
(
&
json_content
)
?
}
else
{
// Try single JSON object
let
parsed
=
serde_json
::
from_str
::
<
Value
>
(
json_content
.trim
())
.map_err
(|
e
|
ToolParserError
::
ParsingFailed
(
e
.to_string
()))
.and_then
(|
v
|
{
self
.parse_single_object
(
&
v
)
.map
(|
opt
|
opt
.map_or_else
(
Vec
::
new
,
|
tool
|
vec!
[
tool
]))
});
parsed
.unwrap_or_else
(|
e
|
{
tracing
::
warn!
(
"Failed to parse tool call: {:?}"
,
e
);
vec!
[]
})
};
// If we couldn't parse any tools, return the original text
if
tools
.is_empty
()
{
return
Ok
((
text
.to_string
(),
vec!
[]));
}
// No tool calls found, return original text as normal text
Ok
((
text
.to_string
(),
vec!
[]))
Ok
((
normal_text
,
tools
))
}
async
fn
parse_incremental
(
...
...
@@ -75,29 +189,138 @@ impl ToolParser for LlamaParser {
chunk
:
&
str
,
state
:
&
mut
ParseState
,
)
->
ToolParserResult
<
StreamResult
>
{
// First, try with the configured json_parser (which handles python_tag)
let
result
=
self
.json_parser
.parse_incremental
(
chunk
,
state
)
.await
?
;
state
.buffer
.push_str
(
chunk
);
// In streaming mode, be more lenient - check for potential JSON start
let
has_potential_json
=
state
.buffer
.contains
(
'{'
);
let
has_tag
=
self
.has_python_tag
(
&
state
.buffer
);
// If we have neither python_tag nor potential JSON structure, return as normal text
if
!
has_tag
&&
!
has_potential_json
{
// No relevant markers detected - return all buffered content as normal text
let
normal_text
=
std
::
mem
::
take
(
&
mut
state
.buffer
);
return
Ok
(
StreamResult
::
NormalText
(
normal_text
));
}
// If we only have '{' without more content, wait for more data
let
trimmed
=
state
.buffer
.trim
();
if
(
trimmed
==
"{"
)
&&
!
has_tag
{
return
Ok
(
StreamResult
::
Incomplete
);
}
// Check for text before python_tag and extract it as normal text
if
let
Some
(
tag_pos
)
=
state
.buffer
.find
(
"<|python_tag|>"
)
{
if
tag_pos
>
0
{
// We have text before the python_tag - extract it as normal text
let
normal_text
:
String
=
state
.buffer
.drain
(
..
tag_pos
)
.collect
();
return
Ok
(
StreamResult
::
NormalText
(
normal_text
));
}
}
else
{
// For JSON without python_tag, look for the start of JSON structure
let
brace_pos
=
state
.buffer
.find
(
'{'
);
let
bracket_pos
=
state
.buffer
.find
(
'['
);
let
json_pos
=
brace_pos
.iter
()
.chain
(
bracket_pos
.iter
())
.min
()
.copied
();
if
let
Some
(
pos
)
=
json_pos
{
if
pos
>
0
{
// We have text before JSON structure - extract it as normal text
let
normal_text
:
String
=
state
.buffer
.drain
(
..
pos
)
.collect
();
return
Ok
(
StreamResult
::
NormalText
(
normal_text
));
}
}
}
// If we get Incomplete and no python_tag in buffer, might be plain JSON
if
matches!
(
result
,
StreamResult
::
Incomplete
)
{
// Extract JSON content based on whether we have python_tag
let
(
json_content
,
content_start_pos
)
=
if
self
.has_python_tag
(
&
state
.buffer
)
{
// Extract content after python_tag
if
let
Some
(
tag_pos
)
=
state
.buffer
.find
(
"<|python_tag|>"
)
{
let
start
=
tag_pos
+
"<|python_tag|>"
.len
();
(
&
state
.buffer
[
start
..
],
start
)
}
else
{
(
&
state
.buffer
[
..
],
0
)
}
}
else
{
// Find where the actual content starts after trimming
let
trimmed
=
state
.buffer
.trim_start
();
if
trimmed
.starts_with
(
'{'
)
&&
!
state
.buffer
.contains
(
"<|python_tag|>"
)
{
// Likely plain JSON, try with a plain parser
// Note: We need to be careful not to double-add the chunk
let
plain_parser
=
JsonParser
::
new
();
// The chunk was already added to state.buffer by json_parser above
// So we call with empty string to just process what's in the buffer
return
plain_parser
.parse_incremental
(
""
,
state
)
.await
;
let
trim_offset
=
state
.buffer
.len
()
-
trimmed
.len
();
(
trimmed
.trim_end
(),
trim_offset
)
};
// Check if we have a semicolon separator (multiple tools)
if
let
Some
(
semicolon_pos
)
=
json_content
.find
(
';'
)
{
// We have multiple tools - try to parse the first one
let
first_json
=
&
json_content
[
..
semicolon_pos
];
if
let
Ok
(
value
)
=
serde_json
::
from_str
::
<
Value
>
(
first_json
.trim
())
{
if
let
Some
(
tool
)
=
self
.parse_single_object
(
&
value
)
?
{
// Remove the parsed JSON and semicolon from the buffer
let
end_pos
=
content_start_pos
+
semicolon_pos
+
1
;
// +1 to include the semicolon
state
.buffer
.drain
(
content_start_pos
..
end_pos
);
return
Ok
(
StreamResult
::
ToolComplete
(
tool
));
}
}
}
// Try to parse with partial JSON parser
match
self
.partial_json
.parse_value
(
json_content
)
{
Ok
((
value
,
consumed
))
=>
{
// Check if we have a complete JSON structure
if
consumed
==
json_content
.len
()
{
// Check if this is truly complete
let
looks_complete
=
json_content
.ends_with
(
'}'
)
||
json_content
.ends_with
(
']'
);
if
looks_complete
{
// Complete JSON, parse tool calls
let
tools
=
self
.parse_json_value
(
&
value
)
?
;
if
!
tools
.is_empty
()
{
// Clear buffer since we consumed everything
state
.buffer
.clear
();
// Return the first tool as complete
if
let
Some
(
tool
)
=
tools
.into_iter
()
.next
()
{
return
Ok
(
StreamResult
::
ToolComplete
(
tool
));
}
}
}
}
else
{
// Partial JSON, try to extract tool name for streaming
if
let
Some
(
name
)
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
{
// Return tool name once we see it
if
!
state
.in_string
{
state
.in_string
=
true
;
// Use as a flag for "name sent"
return
Ok
(
StreamResult
::
ToolName
{
index
:
0
,
name
:
name
.to_string
(),
});
}
// Check for complete arguments
if
let
Some
(
args
)
=
value
.get
(
"arguments"
)
.or_else
(||
value
.get
(
"parameters"
))
{
if
let
Ok
(
args_str
)
=
serde_json
::
to_string
(
args
)
{
return
Ok
(
StreamResult
::
ToolArguments
{
index
:
0
,
arguments
:
args_str
,
});
}
}
}
}
}
Err
(
_
)
=>
{
// Failed to parse even as partial JSON
// Continue waiting for more data
}
}
Ok
(
result
)
Ok
(
StreamResult
::
Incomplete
)
}
fn
detect_format
(
&
self
,
text
:
&
str
)
->
bool
{
// Llama format if contains python_tag or starts with JSON object
text
.contains
(
"<|python_tag|>"
)
||
(
text
.trim_start
()
.starts_with
(
'{'
)
&&
(
text
.contains
(
r#""name""#
)
||
text
.contains
(
r#""function""#
)))
||
(
text
.trim_start
()
.starts_with
(
'{'
)
&&
text
.contains
(
r#""name""#
))
}
}
sgl-router/src/tool_parser/parsers/mistral_parser.rs
View file @
4eeaff74
...
...
@@ -280,33 +280,6 @@ impl ToolParser for MistralParser {
}
fn
detect_format
(
&
self
,
text
:
&
str
)
->
bool
{
// Check if text contains Mistral-specific markers
if
self
.has_tool_markers
(
text
)
{
// Try to extract and validate the array
if
let
Some
(
json_array
)
=
self
.extract_json_array
(
text
)
{
// Check if it's valid JSON
if
let
Ok
(
value
)
=
serde_json
::
from_str
::
<
Value
>
(
json_array
)
{
// Check if it contains tool-like structures
match
value
{
Value
::
Array
(
ref
arr
)
=>
arr
.iter
()
.any
(|
v
|
{
v
.as_object
()
.is_some_and
(|
o
|
{
o
.contains_key
(
"name"
)
&&
o
.contains_key
(
"arguments"
)
})
}),
Value
::
Object
(
ref
obj
)
=>
{
obj
.contains_key
(
"name"
)
&&
obj
.contains_key
(
"arguments"
)
}
_
=>
false
,
}
}
else
{
false
}
}
else
{
// Has markers but no complete array - might be streaming
true
}
}
else
{
false
}
self
.has_tool_markers
(
text
)
}
}
sgl-router/src/tool_parser/parsers/pythonic_parser.rs
View file @
4eeaff74
...
...
@@ -130,32 +130,7 @@ impl ToolParser for PythonicParser {
return
true
;
}
let
trimmed
=
cleaned
.trim
();
let
Some
(
open_idx
)
=
trimmed
.find
(
'['
)
else
{
return
false
;
};
let
after_bracket
=
trimmed
[
open_idx
+
1
..
]
.trim_start
();
let
mut
chars
=
after_bracket
.char_indices
();
let
Some
((
_
,
first_char
))
=
chars
.next
()
else
{
return
false
;
};
if
!
(
first_char
.is_ascii_alphabetic
()
||
first_char
==
'_'
)
{
return
false
;
}
let
mut
ident_len
=
first_char
.len_utf8
();
for
(
idx
,
ch
)
in
chars
{
if
ch
.is_alphanumeric
()
||
ch
==
'_'
{
ident_len
=
idx
+
ch
.len_utf8
();
}
else
{
break
;
}
}
let
remaining
=
after_bracket
[
ident_len
..
]
.trim_start
();
remaining
.starts_with
(
'('
)
false
}
}
...
...
sgl-router/src/tool_parser/parsers/qwen_parser.rs
View file @
4eeaff74
...
...
@@ -39,14 +39,6 @@ impl QwenParser {
}
}
/// Extract all tool call blocks from text
fn
extract_tool_calls
<
'a
>
(
&
self
,
text
:
&
'a
str
)
->
Vec
<&
'a
str
>
{
self
.extractor
.captures_iter
(
text
)
.filter_map
(|
cap
|
cap
.get
(
1
)
.map
(|
m
|
m
.as_str
()))
.collect
()
}
/// Parse a single JSON object into a ToolCall
fn
parse_single_object
(
&
self
,
obj
:
&
Value
,
index
:
usize
)
->
ToolParserResult
<
Option
<
ToolCall
>>
{
let
name
=
obj
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
());
...
...
@@ -142,17 +134,15 @@ impl ToolParser for QwenParser {
let
mut
tools
=
Vec
::
new
();
for
(
index
,
captures
)
in
self
.extractor
.captures_iter
(
text
)
.enumerate
()
{
if
let
Some
(
json_str
)
=
captures
.get
(
1
)
{
match
serde_json
::
from_str
::
<
Value
>
(
json_str
.as_str
()
.trim
())
{
Ok
(
value
)
=>
match
self
.parse_single_object
(
&
value
,
index
)
{
Ok
(
Some
(
tool
))
=>
tools
.push
(
tool
),
Ok
(
None
)
=>
continue
,
Err
(
e
)
=>
{
tracing
::
warn!
(
"Failed to parse tool call: {}"
,
e
);
continue
;
}
},
let
parsed
=
serde_json
::
from_str
::
<
Value
>
(
json_str
.as_str
()
.trim
())
.map_err
(|
e
|
ToolParserError
::
ParsingFailed
(
e
.to_string
()))
.and_then
(|
v
|
self
.parse_single_object
(
&
v
,
index
));
match
parsed
{
Ok
(
Some
(
tool
))
=>
tools
.push
(
tool
),
Ok
(
None
)
=>
continue
,
Err
(
e
)
=>
{
tracing
::
warn!
(
"Failed to parse
JSON in
tool call
: {}"
,
e
);
tracing
::
warn!
(
"Failed to parse tool call
{}: {:?}"
,
index
,
e
);
continue
;
}
}
...
...
@@ -268,26 +258,6 @@ impl ToolParser for QwenParser {
}
fn
detect_format
(
&
self
,
text
:
&
str
)
->
bool
{
// Check if text contains Qwen-specific markers. If not, it's not this format.
if
!
self
.has_tool_markers
(
text
)
{
return
false
;
}
// Try to extract tool calls to see if we have a complete, valid one.
let
tool_blocks
=
self
.extract_tool_calls
(
text
);
for
json_str
in
&
tool_blocks
{
if
let
Ok
(
value
)
=
serde_json
::
from_str
::
<
Value
>
(
json_str
.trim
())
{
if
let
Some
(
obj
)
=
value
.as_object
()
{
if
obj
.contains_key
(
"name"
)
&&
obj
.contains_key
(
"arguments"
)
{
// Found a valid, complete tool call.
return
true
;
}
}
}
}
// If we have the marker but no valid complete tool call,
// it could be a partial stream. We should detect this as the format.
true
self
.has_tool_markers
(
text
)
}
}
sgl-router/src/tool_parser/tests.rs
View file @
4eeaff74
...
...
@@ -4,7 +4,6 @@ use crate::tool_parser::partial_json::{
compute_diff
,
find_common_prefix
,
is_complete_json
,
PartialJson
,
};
use
crate
::
tool_parser
::
traits
::
ToolParser
;
use
crate
::
tool_parser
::
types
::
TokenConfig
;
#[test]
fn
test_parse_state_new
()
{
...
...
@@ -42,20 +41,6 @@ fn test_parse_state_process_char() {
assert
!
(
state
.in_string
);
// Still in string because quote was escaped
}
#[test]
fn
test_token_config
()
{
let
config
=
TokenConfig
{
start_tokens
:
vec!
[
"<start>"
.to_string
(),
"["
.to_string
()],
end_tokens
:
vec!
[
"</end>"
.to_string
(),
"]"
.to_string
()],
separator
:
", "
.to_string
(),
};
let
pairs
:
Vec
<
_
>
=
config
.iter_pairs
()
.collect
();
assert_eq!
(
pairs
.len
(),
2
);
assert_eq!
(
pairs
[
0
],
(
"<start>"
,
"</end>"
));
assert_eq!
(
pairs
[
1
],
(
"["
,
"]"
));
}
#[test]
fn
test_parser_registry
()
{
let
registry
=
ParserRegistry
::
new
();
...
...
@@ -280,46 +265,7 @@ async fn test_json_parser_with_parameters() {
assert
!
(
tools
[
0
]
.function.arguments
.contains
(
"add"
));
}
#[tokio::test]
async
fn
test_json_parser_with_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"[TOOL_CALLS] ["
.to_string
()],
end_tokens
:
vec!
[
"]"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"[TOOL_CALLS] [{"name": "search", "arguments": {"query": "rust programming"}}]"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"search"
);
}
#[tokio::test]
async
fn
test_multiline_json_with_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
// Pretty-printed multi-line JSON
let
input
=
r#"<tool>{
"name": "get_weather",
"arguments": {
"location": "San Francisco",
"units": "celsius",
"include_forecast": true
}
}</tool>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"get_weather"
);
assert
!
(
tools
[
0
]
.function.arguments
.contains
(
"San Francisco"
));
assert
!
(
tools
[
0
]
.function.arguments
.contains
(
"celsius"
));
assert
!
(
tools
[
0
]
.function.arguments
.contains
(
"true"
));
}
// Tests removed - TokenConfig no longer supported in JsonParser
#[tokio::test]
async
fn
test_multiline_json_array
()
{
...
...
@@ -361,29 +307,6 @@ fn test_json_parser_format_detection() {
// Should not detect non-tool formats
assert
!
(
!
parser
.detect_format
(
"plain text"
));
assert
!
(
!
parser
.detect_format
(
r#"{"key": "value"}"#
));
assert
!
(
!
parser
.detect_format
(
r#"{"data": {"nested": true}}"#
));
}
#[tokio::test]
async
fn
test_json_parser_streaming
()
{
let
parser
=
JsonParser
::
new
();
let
mut
state
=
ParseState
::
new
();
let
full_json
=
r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#
;
let
result
=
parser
.parse_incremental
(
full_json
,
&
mut
state
)
.await
.unwrap
();
match
result
{
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_weather"
);
assert
!
(
tool
.function.arguments
.contains
(
"San Francisco"
));
}
_
=>
panic!
(
"Expected ToolComplete for complete JSON"
),
}
}
#[tokio::test]
...
...
@@ -469,37 +392,7 @@ mod failure_cases {
assert_eq!
(
tools
[
0
]
.function.arguments
,
"null"
);
}
#[tokio::test]
async
fn
test_broken_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
// Missing end token
let
input
=
r#"<tool>{"name": "test", "arguments": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
,
"Should fail to parse without complete wrapper"
);
// Missing start token - parser looks for complete wrapper, so this won't parse
let
input
=
r#"{"name": "test", "arguments": {}}</tool>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
,
"Should not parse JSON with incomplete wrapper"
);
// Mismatched tokens
let
input
=
r#"<tool>{"name": "test", "arguments": {}}</wrong>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
,
"Should fail with mismatched tokens"
);
}
// Test removed - wrapper token functionality moved to specific parsers
#[tokio::test]
async
fn
test_invalid_json_structures
()
{
...
...
@@ -653,34 +546,6 @@ mod edge_cases {
assert
!
(
tools
[
0
]
.function.arguments
.contains
(
"null"
));
}
#[tokio::test]
async
fn
test_multiple_token_pairs_with_conflicts
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<<"
.to_string
(),
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
">>"
.to_string
(),
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
// First pattern
let
input
=
r#"<<{"name": "test1", "arguments": {}}>>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test1"
);
// Second pattern
let
input
=
r#"<tool>{"name": "test2", "arguments": {}}</tool>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test2"
);
// Nested patterns (should use first match)
let
input
=
r#"<<tool>{"name": "test3", "arguments": {}}</tool>>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
// This is tricky - depends on regex behavior
// The parser should handle this gracefully
assert
!
(
tools
.len
()
<=
1
,
"Should not parse multiple times"
);
}
#[tokio::test]
async
fn
test_streaming_with_partial_chunks
()
{
let
parser
=
JsonParser
::
new
();
...
...
sgl-router/tests/tool_parser_fallback.rs
View file @
4eeaff74
...
...
@@ -261,12 +261,12 @@ async fn test_almost_valid_tool_calls() {
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
// Some JSON parsers might accept trailing commas
if
tools
.is_empty
()
{
assert_eq!
(
normal_text
,
r#"{"name": "test", "arguments": ,}"#
);
assert_eq!
(
normal_text
,
r#"{"name": "test", "arguments":
{}
,}"#
);
}
// Wrong quote types
let
input
=
r#"{'name': 'test', 'arguments': {}}"#
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
);
// Standard JSON requires double quotes
assert_eq!
(
normal_text
,
r#"{'name': 'test', 'arguments': }"#
);
assert_eq!
(
normal_text
,
r#"{'name': 'test', 'arguments':
{}
}"#
);
}
sgl-router/tests/tool_parser_json.rs
View file @
4eeaff74
...
...
@@ -3,7 +3,7 @@
//! Tests for the JSON parser which handles OpenAI, Claude, and generic JSON formats
use
serde_json
::
json
;
use
sglang_router_rs
::
tool_parser
::{
JsonParser
,
TokenConfig
,
ToolParser
};
use
sglang_router_rs
::
tool_parser
::{
JsonParser
,
ToolParser
};
#[tokio::test]
async
fn
test_simple_json_tool_call
()
{
...
...
@@ -158,34 +158,4 @@ async fn test_json_format_detection() {
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "arguments": {}}"#
));
assert
!
(
parser
.detect_format
(
r#"[{"name": "test"}]"#
));
assert
!
(
!
parser
.detect_format
(
"plain text"
));
assert
!
(
!
parser
.detect_format
(
r#"{"key": "value"}"#
));
// No name field
}
#[tokio::test]
async
fn
test_parse_with_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"<tool>{"name": "test", "arguments": {}}</tool>"#
;
let
(
normal_text
,
tool_calls
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tool_calls
.len
(),
1
);
assert_eq!
(
tool_calls
[
0
]
.function.name
,
"test"
);
assert_eq!
(
normal_text
,
""
);
// Wrapper tokens with no extra text
}
#[tokio::test]
async
fn
test_parse_with_start_token_invalid_json
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<|python_tag|>"
.to_string
()],
end_tokens
:
vec!
[
""
.to_string
()],
separator
:
";"
.to_string
(),
});
let
input
=
r#"Hello world <|python_tag|>this is not valid json at all"#
;
let
(
normal_text
,
tool_calls
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tool_calls
.len
(),
0
);
assert_eq!
(
normal_text
,
input
);
// Should return entire original text when JSON parsing fails
}
sgl-router/tests/tool_parser_llama.rs
View file @
4eeaff74
...
...
@@ -7,20 +7,44 @@ use sglang_router_rs::tool_parser::{LlamaParser, ToolParser};
#[tokio::test]
async
fn
test_llama_python_tag_format
()
{
let
parser
=
LlamaParser
::
new
();
let
input
=
r#"<|python_tag|>{"name": "search", "ar
gument
s": {"query": "weather"}}"#
;
let
input
=
r#"
Here are some results:
<|python_tag|>{"name": "search", "
p
ar
ameter
s": {"query": "weather"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"search"
);
assert_eq!
(
normal_text
,
"Here are some results: "
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tools
[
0
]
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"query"
],
"weather"
);
}
#[tokio::test]
async
fn
test_llama_with_semicolon_separation
()
{
let
parser
=
LlamaParser
::
new
();
let
input
=
r#"<|python_tag|>{"name": "tool1", "parameters": {}};{"name": "tool2", "parameters": {"y": 2}}"#
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
2
);
assert_eq!
(
tools
[
0
]
.function.name
,
"tool1"
);
assert_eq!
(
tools
[
1
]
.function.name
,
"tool2"
);
assert_eq!
(
normal_text
,
""
);
}
#[tokio::test]
async
fn
test_llama_no_tool_calls
()
{
let
parser
=
LlamaParser
::
new
();
let
input
=
"This is just plain text with no tool calls"
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
);
assert_eq!
(
normal_text
,
input
);
}
#[tokio::test]
async
fn
test_llama_plain_json_fallback
()
{
let
parser
=
LlamaParser
::
new
();
let
input
=
r#"{"name": "calculate", "ar
gument
s": {"x": 5, "y": 10}}"#
;
let
input
=
r#"{"name": "calculate", "
p
ar
ameter
s": {"x": 5, "y": 10}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
...
...
@@ -34,7 +58,7 @@ async fn test_llama_plain_json_fallback() {
#[tokio::test]
async
fn
test_llama_with_text_before
()
{
let
parser
=
LlamaParser
::
new
();
let
input
=
r#"Let me help you with that. <|python_tag|>{"name": "get_time", "ar
gument
s": {"timezone": "UTC"}}"#
;
let
input
=
r#"Let me help you with that. <|python_tag|>{"name": "get_time", "
p
ar
ameter
s": {"timezone": "UTC"}}"#
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
...
...
@@ -50,7 +74,7 @@ async fn test_llama_with_nested_json() {
let
parser
=
LlamaParser
::
new
();
let
input
=
r#"<|python_tag|>{
"name": "update_settings",
"ar
gument
s": {
"
p
ar
ameter
s": {
"preferences": {
"theme": "dark",
"language": "en"
...
...
@@ -73,13 +97,13 @@ async fn test_llama_empty_arguments() {
let
parser
=
LlamaParser
::
new
();
// With python_tag
let
input
=
r#"<|python_tag|>{"name": "ping", "ar
gument
s": {}}"#
;
let
input
=
r#"<|python_tag|>{"name": "ping", "
p
ar
ameter
s": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"ping"
);
// Plain JSON
let
input
=
r#"{"name": "ping", "ar
gument
s": {}}"#
;
let
input
=
r#"{"name": "ping", "
p
ar
ameter
s": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"ping"
);
...
...
@@ -90,7 +114,7 @@ async fn test_llama_format_detection() {
let
parser
=
LlamaParser
::
new
();
assert
!
(
parser
.detect_format
(
r#"<|python_tag|>{"name": "test"}"#
));
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "ar
gument
s": {}}"#
));
assert
!
(
parser
.detect_format
(
r#"{"name": "test", "
p
ar
ameter
s": {}}"#
));
assert
!
(
!
parser
.detect_format
(
"plain text"
));
assert
!
(
!
parser
.detect_format
(
r#"{"key": "value"}"#
));
// No name field
}
...
...
@@ -112,7 +136,7 @@ async fn test_llama_real_world_output() {
// Actual output from Llama 3.2 model - simplified for testing
let
input
=
r#"I'll search for that information for you.
<|python_tag|>{"name": "web_search", "ar
gument
s": {"query": "Llama 3.2 model capabilities", "num_results": 5, "search_type": "recent"}}"#
;
<|python_tag|>{"name": "web_search", "
p
ar
ameter
s": {"query": "Llama 3.2 model capabilities", "num_results": 5, "search_type": "recent"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
...
...
@@ -120,7 +144,7 @@ async fn test_llama_real_world_output() {
let
formatted_input
=
r#"<|python_tag|>{
"name": "get_current_time",
"ar
gument
s": {
"
p
ar
ameter
s": {
"timezone": "America/New_York",
"format": "ISO8601"
}
...
...
@@ -131,22 +155,10 @@ async fn test_llama_real_world_output() {
assert_eq!
(
tools2
[
0
]
.function.name
,
"get_current_time"
);
}
#[tokio::test]
async
fn
test_llama_json_array_format
()
{
let
parser
=
LlamaParser
::
new
();
// Plain JSON array (should work as fallback)
let
input
=
r#"[{"name": "func1", "arguments": {}}, {"name": "func2", "arguments": {}}]"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
// Current implementation might handle this through JSON fallback
assert
!
(
!
tools
.is_empty
());
}
#[tokio::test]
async
fn
test_single_json
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"{"name": "get_weather", "ar
gument
s": {"city": "Paris"}}"#
;
let
text
=
r#"{"name": "get_weather", "
p
ar
ameter
s": {"city": "Paris"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
...
...
@@ -159,7 +171,7 @@ async fn test_single_json() {
#[tokio::test]
async
fn
test_multiple_json_with_separator
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"<|python_tag|>{"name": "get_weather", "ar
gument
s": {"city": "Paris"}};{"name": "get_tourist_attractions", "ar
gument
s": {"city": "Paris"}}"#
;
let
text
=
r#"<|python_tag|>{"name": "get_weather", "
p
ar
ameter
s": {"city": "Paris"}};{"name": "get_tourist_attractions", "
p
ar
ameter
s": {"city": "Paris"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
// Note: Current implementation may only parse the first one due to semicolon handling
...
...
@@ -167,31 +179,24 @@ async fn test_multiple_json_with_separator() {
assert_eq!
(
tools
[
0
]
.function.name
,
"get_weather"
);
}
#[tokio::test]
async
fn
test_multiple_json_with_separator_customized
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"<|python_tag|>{"name": "get_weather", "arguments": {}}<|python_tag|>{"name": "get_tourist_attractions", "arguments": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
// Current implementation may handle this differently
assert
!
(
!
tools
.is_empty
());
assert_eq!
(
tools
[
0
]
.function.name
,
"get_weather"
);
}
#[tokio::test]
async
fn
test_json_with_trailing_text
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"{"name": "get_weather", "arguments": {}} Some follow-up text"#
;
// Valid JSON with trailing text - LlamaParser doesn't support this mixed format
let
text
=
r#"{"name": "get_weather", "parameters": {}} Some follow-up text"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"get_weather"
);
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
// LlamaParser expects pure JSON or <|python_tag|> format, not JSON with trailing text
// So this returns as normal text
assert_eq!
(
tools
.len
(),
0
);
assert_eq!
(
normal_text
,
text
);
}
#[tokio::test]
async
fn
test_invalid_then_valid_json
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"{"name": "get_weather", "arguments": {{"name": "get_weather", "arguments": {}}"#
;
let
text
=
r#"{"name": "get_weather", "parameters": {{"name": "get_weather", "parameters": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
// Should parse at least one valid JSON
...
...
@@ -212,7 +217,7 @@ async fn test_plain_text_only() {
#[tokio::test]
async
fn
test_with_python_tag_prefix
()
{
let
parser
=
LlamaParser
::
new
();
let
text
=
r#"Some intro. <|python_tag|>{"name": "get_weather", "ar
gument
s": {}}"#
;
let
text
=
r#"Some intro. <|python_tag|>{"name": "get_weather", "
p
ar
ameter
s": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
text
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
...
...
@@ -227,7 +232,7 @@ async fn test_llama_streaming_simple() {
let
mut
state
=
sglang_router_rs
::
tool_parser
::
ParseState
::
new
();
// Send complete JSON at once
let
full_json
=
r#"<|python_tag|>{"name": "search", "ar
gument
s": {"query": "weather"}}"#
;
let
full_json
=
r#"<|python_tag|>{"name": "search", "
p
ar
ameter
s": {"query": "weather"}}"#
;
let
result
=
parser
.parse_incremental
(
full_json
,
&
mut
state
)
...
...
@@ -252,7 +257,7 @@ async fn test_llama_streaming_partial() {
r#"<|python"#
,
r#"_tag|>{"name": "#
,
r#""calculate", "#
,
r#""ar
gument
s": {"x": 10}"#
,
r#""
p
ar
ameter
s": {"x": 10}"#
,
r#"}"#
,
];
...
...
@@ -278,7 +283,7 @@ async fn test_llama_streaming_plain_json() {
let
chunks
=
vec!
[
r#"{"name": "#
,
r#""search", "#
,
r#""ar
gument
s": "#
,
r#""
p
ar
ameter
s": "#
,
r#"{"query": "#
,
r#""test"}}"#
,
];
...
...
@@ -305,7 +310,7 @@ async fn test_llama_streaming_with_text_before() {
r#"Let me help you. "#
,
r#"<|python_tag|>"#
,
r#"{"name": "get_time","#
,
r#" "ar
gument
s": {"#
,
r#" "
p
ar
ameter
s": {"#
,
r#""timezone": "UTC"}}"#
,
];
...
...
@@ -328,7 +333,7 @@ async fn test_llama_streaming_multiple_tools() {
let
mut
state
=
sglang_router_rs
::
tool_parser
::
ParseState
::
new
();
let
text
=
r#"<|python_tag|>{"name": "func1", "ar
gument
s": {}};{"name": "func2", "ar
gument
s": {}}"#
;
r#"<|python_tag|>{"name": "func1", "
p
ar
ameter
s": {}};{"name": "func2", "
p
ar
ameter
s": {}}"#
;
let
result
=
parser
.parse_incremental
(
text
,
&
mut
state
)
.await
.unwrap
();
...
...
@@ -337,7 +342,7 @@ async fn test_llama_streaming_multiple_tools() {
sglang_router_rs
::
tool_parser
::
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"func1"
);
}
_
=>
panic!
(
"Expected first tool to be complete
"
),
_
=>
panic!
(
"Expected first tool to be complete
, got: {:?}"
,
result
),
}
// Process remaining buffer to get second tool
...
...
@@ -356,7 +361,7 @@ async fn test_llama_streaming_multiple_tools_chunked() {
let
mut
state
=
sglang_router_rs
::
tool_parser
::
ParseState
::
new
();
// First chunk - incomplete first JSON
let
chunk1
=
r#"<|python_tag|>{"name": "get_weather", "ar
gument
s""#
;
let
chunk1
=
r#"<|python_tag|>{"name": "get_weather", "
p
ar
ameter
s""#
;
let
result1
=
parser
.parse_incremental
(
chunk1
,
&
mut
state
)
.await
.unwrap
();
// Should be incomplete or have tool name
...
...
@@ -383,32 +388,15 @@ async fn test_llama_streaming_multiple_tools_chunked() {
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tool
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"city"
],
"Paris"
);
}
_
=>
panic!
(
"Expected first tool
to be
complete
after separator"
),
_
=>
panic!
(
"Expected first tool complete
, got: {:?}"
,
result2
),
}
// Third chunk - complete second JSON
let
chunk3
=
r#""get_time", "arguments": {"timezone": "UTC"}}"#
;
let
chunk3
=
r#""get_time", "parameters": {"timezone": "UTC"}}"#
;
let
result3
=
parser
.parse_incremental
(
chunk3
,
&
mut
state
)
.await
.unwrap
();
// Should get second tool complete
match
result3
{
sglang_router_rs
::
tool_parser
::
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_time"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tool
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"timezone"
],
"UTC"
);
}
_
=>
{
// If not complete yet, try one more empty chunk
let
result4
=
parser
.parse_incremental
(
""
,
&
mut
state
)
.await
.unwrap
();
match
result4
{
sglang_router_rs
::
tool_parser
::
StreamResult
::
ToolComplete
(
tool
)
=>
{
assert_eq!
(
tool
.function.name
,
"get_time"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tool
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"timezone"
],
"UTC"
);
}
_
=>
panic!
(
"Expected second tool to be complete"
),
}
}
_
=>
panic!
(
"Expected tool to be complete, got: {:?}"
,
result3
),
}
}
sgl-router/tests/tool_parser_pythonic.rs
View file @
4eeaff74
...
...
@@ -122,10 +122,9 @@ async fn test_pythonic_empty_arguments() {
async
fn
test_pythonic_format_detection
()
{
let
parser
=
PythonicParser
::
new
();
assert
!
(
parser
.detect_format
(
"[function_name("
));
assert
!
(
!
parser
.detect_format
(
"[function_name("
));
// Incomplete
assert
!
(
parser
.detect_format
(
"[get_weather(city=
\"
NYC
\"
)]"
));
assert
!
(
!
parser
.detect_format
(
"Just plain text"
));
assert
!
(
!
parser
.detect_format
(
"[1, 2, 3]"
));
// Plain list
assert
!
(
!
parser
.detect_format
(
"{
\"
name
\"
:
\"
test
\"
}"
));
// JSON
}
...
...
sgl-router/tests/tool_parser_wrapper_tokens.rs
deleted
100644 → 0
View file @
a17e70f5
//! Wrapper Token Tests
//!
//! Tests for JSON parser with custom wrapper tokens
use
sglang_router_rs
::
tool_parser
::{
JsonParser
,
TokenConfig
,
ToolParser
};
#[tokio::test]
async
fn
test_json_with_xml_style_wrapper
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"Some text before <tool>{"name": "test", "arguments": {"x": 1}}</tool> and after"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tools
[
0
]
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"x"
],
1
);
}
#[tokio::test]
async
fn
test_json_with_multiple_wrapper_pairs
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
(),
"<<TOOL>>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
(),
"<</TOOL>>"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input1
=
r#"<tool>{"name": "tool1", "arguments": {}}</tool>"#
;
let
(
_
normal_text
,
tools1
)
=
parser
.parse_complete
(
input1
)
.await
.unwrap
();
assert_eq!
(
tools1
.len
(),
1
);
assert_eq!
(
tools1
[
0
]
.function.name
,
"tool1"
);
let
input2
=
r#"<<TOOL>>{"name": "tool2", "arguments": {}}<</TOOL>>"#
;
let
(
_
normal_text
,
tools2
)
=
parser
.parse_complete
(
input2
)
.await
.unwrap
();
assert_eq!
(
tools2
.len
(),
1
);
assert_eq!
(
tools2
[
0
]
.function.name
,
"tool2"
);
}
#[tokio::test]
async
fn
test_json_with_only_start_token
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
">>>FUNCTION:"
.to_string
()],
end_tokens
:
vec!
[
""
.to_string
()],
// Empty end token
separator
:
", "
.to_string
(),
});
let
input
=
r#"Some preamble >>>FUNCTION:{"name": "execute", "arguments": {"cmd": "ls"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"execute"
);
}
#[tokio::test]
async
fn
test_json_with_custom_separator
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"[FUNC]"
.to_string
()],
end_tokens
:
vec!
[
"[/FUNC]"
.to_string
()],
separator
:
" | "
.to_string
(),
// Custom separator
});
// Though we're not testing multiple tools here, the separator is configured
let
input
=
r#"[FUNC]{"name": "test", "arguments": {}}[/FUNC]"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test"
);
}
#[tokio::test]
async
fn
test_json_with_nested_wrapper_tokens_in_content
()
{
// Known limitation: When wrapper tokens appear inside JSON strings,
// the simple regex-based extraction may fail. This would require
// a more sophisticated parser that understands JSON string escaping.
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<call>"
.to_string
()],
end_tokens
:
vec!
[
"</call>"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"<call>{"name": "echo", "arguments": {"text": "Use <call> and </call> tags"}}</call>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
// This is a known limitation - the parser may fail when end tokens appear in content
// For now, we accept this behavior
if
tools
.is_empty
()
{
// Parser failed due to nested tokens - this is expected
assert_eq!
(
tools
.len
(),
0
,
"Known limitation: nested wrapper tokens in content"
);
}
else
{
// If it does parse, verify it's correct
assert_eq!
(
tools
[
0
]
.function.name
,
"echo"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tools
[
0
]
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"text"
],
"Use <call> and </call> tags"
);
}
}
#[tokio::test]
async
fn
test_json_extraction_without_wrapper_tokens
()
{
// Default parser without wrapper tokens should extract JSON from text
let
parser
=
JsonParser
::
new
();
let
input
=
r#"
Here is some text before the JSON.
{"name": "search", "arguments": {"query": "test"}}
And here is some text after.
"#
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
normal_text
,
"
\n
Here is some text before the JSON.
\n
\n
And here is some text after.
\n
"
);
assert_eq!
(
tools
[
0
]
.function.name
,
"search"
);
}
#[tokio::test]
async
fn
test_json_with_multiline_wrapper_content
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"```json
\n
"
.to_string
()],
end_tokens
:
vec!
[
"
\n
```"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"Here's the function call:
```json
{
"name": "format_code",
"arguments": {
"language": "rust",
"code": "fn main() {}"
}
}
```
Done!"#
;
let
(
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
normal_text
,
""
);
assert_eq!
(
tools
[
0
]
.function.name
,
"format_code"
);
}
#[tokio::test]
async
fn
test_json_with_special_chars_in_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"{{FUNC[["
.to_string
()],
end_tokens
:
vec!
[
"]]FUNC}}"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"{{FUNC[[{"name": "test", "arguments": {"special": "[]{}"}}]]FUNC}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test"
);
let
args
:
serde_json
::
Value
=
serde_json
::
from_str
(
&
tools
[
0
]
.function.arguments
)
.unwrap
();
assert_eq!
(
args
[
"special"
],
"[]{}"
);
}
#[tokio::test]
async
fn
test_json_multiple_tools_with_wrapper
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<fn>"
.to_string
()],
end_tokens
:
vec!
[
"</fn>"
.to_string
()],
separator
:
", "
.to_string
(),
});
// Multiple wrapped JSON objects
let
input
=
r#"
<fn>{"name": "tool1", "arguments": {}}</fn>
Some text between.
<fn>{"name": "tool2", "arguments": {"x": 1}}</fn>
"#
;
// Current implementation might handle this as separate calls
// Let's test that at least the first one is parsed
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert
!
(
!
tools
.is_empty
(),
"Should parse at least one tool"
);
assert_eq!
(
tools
[
0
]
.function.name
,
"tool1"
);
}
#[tokio::test]
async
fn
test_json_wrapper_with_array
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tools>"
.to_string
()],
end_tokens
:
vec!
[
"</tools>"
.to_string
()],
separator
:
", "
.to_string
(),
});
let
input
=
r#"<tools>[
{"name": "func1", "arguments": {}},
{"name": "func2", "arguments": {"param": "value"}}
]</tools>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
2
);
assert_eq!
(
tools
[
0
]
.function.name
,
"func1"
);
assert_eq!
(
tools
[
1
]
.function.name
,
"func2"
);
}
#[tokio::test]
async
fn
test_json_incomplete_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[
"<tool>"
.to_string
()],
end_tokens
:
vec!
[
"</tool>"
.to_string
()],
separator
:
", "
.to_string
(),
});
// Missing end token
let
input
=
r#"<tool>{"name": "test", "arguments": {}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
,
"Should not parse without closing token"
);
// Missing start token
let
input
=
r#"{"name": "test", "arguments": {}}</tool>"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
0
,
"Should not parse without opening token"
);
}
#[tokio::test]
async
fn
test_json_empty_wrapper_tokens
()
{
let
parser
=
JsonParser
::
with_config
(
TokenConfig
{
start_tokens
:
vec!
[],
end_tokens
:
vec!
[],
separator
:
", "
.to_string
(),
});
let
input
=
r#"{"name": "test", "arguments": {"key": "value"}}"#
;
let
(
_
normal_text
,
tools
)
=
parser
.parse_complete
(
input
)
.await
.unwrap
();
assert_eq!
(
tools
.len
(),
1
);
assert_eq!
(
tools
[
0
]
.function.name
,
"test"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment