Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7a49742b
Unverified
Commit
7a49742b
authored
Mar 16, 2026
by
Ben Browning
Committed by
GitHub
Mar 16, 2026
Browse files
[CI/Build] Add common tool call parser test suite (#27599)
Signed-off-by:
Ben Browning
<
bbrownin@redhat.com
>
parent
3e6a1e16
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
1201 additions
and
5 deletions
+1201
-5
tests/entrypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
...ypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
...ints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
...s/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
...entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
...rypoints/openai/tool_parsers/test_pythonic_tool_parser.py
+1
-1
tests/tool_parsers/common_tests.py
tests/tool_parsers/common_tests.py
+378
-0
tests/tool_parsers/conftest.py
tests/tool_parsers/conftest.py
+12
-0
tests/tool_parsers/test_deepseekv3_tool_parser.py
tests/tool_parsers/test_deepseekv3_tool_parser.py
+92
-0
tests/tool_parsers/test_granite_20b_fc_tool_parser.py
tests/tool_parsers/test_granite_20b_fc_tool_parser.py
+76
-0
tests/tool_parsers/test_granite_tool_parser.py
tests/tool_parsers/test_granite_tool_parser.py
+118
-0
tests/tool_parsers/test_internlm2_tool_parser.py
tests/tool_parsers/test_internlm2_tool_parser.py
+122
-0
tests/tool_parsers/test_longcat_tool_parser.py
tests/tool_parsers/test_longcat_tool_parser.py
+101
-0
tests/tool_parsers/test_phi4mini_tool_parser.py
tests/tool_parsers/test_phi4mini_tool_parser.py
+110
-0
tests/tool_parsers/test_qwen3xml_tool_parser.py
tests/tool_parsers/test_qwen3xml_tool_parser.py
+75
-0
tests/tool_parsers/test_step3_tool_parser.py
tests/tool_parsers/test_step3_tool_parser.py
+112
-0
tests/tool_parsers/utils.py
tests/tool_parsers/utils.py
+0
-0
No files found.
tests/entrypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
View file @
7a49742b
...
@@ -5,7 +5,7 @@ import json
...
@@ -5,7 +5,7 @@ import json
import
pytest
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction
,
run_tool_extraction_streaming
,
run_tool_extraction_streaming
,
)
)
...
...
tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
View file @
7a49742b
...
@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
...
@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
import
pytest
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction
,
run_tool_extraction_streaming
,
run_tool_extraction_streaming
,
)
)
...
...
tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
View file @
7a49742b
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction
,
run_tool_extraction_streaming
,
run_tool_extraction_streaming
,
)
)
...
...
tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
View file @
7a49742b
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction
,
run_tool_extraction_streaming
,
run_tool_extraction_streaming
,
)
)
...
...
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
View file @
7a49742b
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction
,
run_tool_extraction_streaming
,
run_tool_extraction_streaming
,
)
)
...
...
tests/tool_parsers/common_tests.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
json
from
dataclasses
import
dataclass
,
field
from
types
import
NoneType
from
typing
import
Any
import
pytest
from
tests.tool_parsers.utils
import
run_tool_extraction
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers
import
ToolParserManager
@
dataclass
class
ToolParserTestConfig
:
"""Configuration for a tool parser's common tests.
This dataclass contains all the test data and expected results needed
to run the common test suite for a parser. Each parser test file
creates one instance of this config with parser-specific values.
Attributes:
parser_name: Name used with ToolParserManager (e.g., "mistral")
Test data (model outputs):
no_tool_calls_output: Plain text without any tool syntax
single_tool_call_output: One tool call with simple arguments
parallel_tool_calls_output: Multiple tool calls in one response
various_data_types_output: Tool with various data types
empty_arguments_output: Tool call with no parameters
surrounding_text_output: Tool call mixed with regular text
escaped_strings_output: Tool call with escaped chars
malformed_input_outputs: List of invalid inputs
Expected results:
single_tool_call_expected_name: Expected function name
single_tool_call_expected_args: Expected arguments dict
parallel_tool_calls_count: Number of tools in parallel test
parallel_tool_calls_names: Function names in order
single_tool_call_expected_content: Content field when tool called
parallel_tool_calls_expected_content: Content for parallel test
xfail markers:
xfail_streaming: Mapping test name to xfail reason (streaming only)
xfail_nonstreaming: Mapping test name to xfail reason (non-streaming)
Special flags:
allow_empty_or_json_empty_args: True if "" or "{}" both valid for empty args
supports_typed_arguments: True if the parser supports typed function arguments
"""
# Parser identification
parser_name
:
str
# Test data - model outputs for each common test
no_tool_calls_output
:
str
single_tool_call_output
:
str
parallel_tool_calls_output
:
str
various_data_types_output
:
str
empty_arguments_output
:
str
surrounding_text_output
:
str
escaped_strings_output
:
str
malformed_input_outputs
:
list
[
str
]
# Expected results for specific tests (optional overrides)
single_tool_call_expected_name
:
str
=
"get_weather"
single_tool_call_expected_args
:
dict
[
str
,
Any
]
=
field
(
default_factory
=
lambda
:
{
"city"
:
"Tokyo"
}
)
parallel_tool_calls_count
:
int
=
2
parallel_tool_calls_names
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[
"get_weather"
,
"get_time"
]
)
# xfail configuration - maps test name to xfail reason
xfail_streaming
:
dict
[
str
,
str
]
=
field
(
default_factory
=
dict
)
xfail_nonstreaming
:
dict
[
str
,
str
]
=
field
(
default_factory
=
dict
)
# Content expectations (some parsers strip content, others don't)
single_tool_call_expected_content
:
str
|
None
=
None
parallel_tool_calls_expected_content
:
str
|
None
=
None
# Special assertions for edge cases
allow_empty_or_json_empty_args
:
bool
=
True
# "{}" or "" for empty args
supports_typed_arguments
:
bool
=
True
class
ToolParserTests
:
"""Mixin class providing common test suite for tool parsers.
To use this mixin in a parser test file:
1. Create a test_config fixture that returns a ToolParserTestConfig instance
2. Inherit from this class
3. Add parser-specific tests as additional methods
Example:
class TestMistralToolParser(ToolParserTests):
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="mistral",
no_tool_calls_output="Plain text...",
# ... other config ...
)
# Parser-specific tests
def test_mistral_specific_feature(self, tool_parser):
# Custom test logic
pass
"""
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
"""Override this to provide parser-specific configuration."""
raise
NotImplementedError
(
"Subclass must provide test_config fixture returning ToolParserTestConfig"
)
@
pytest
.
fixture
def
tokenizer
(
self
,
default_tokenizer
:
TokenizerLike
)
->
TokenizerLike
:
"""Override this to provide parser-specific tokenizer."""
return
default_tokenizer
@
pytest
.
fixture
def
tool_parser
(
self
,
test_config
:
ToolParserTestConfig
,
tokenizer
:
TokenizerLike
):
return
ToolParserManager
.
get_tool_parser
(
test_config
.
parser_name
)(
tokenizer
)
@
pytest
.
fixture
(
params
=
[
True
,
False
])
def
streaming
(
self
,
request
:
pytest
.
FixtureRequest
)
->
bool
:
return
request
.
param
def
test_no_tool_calls
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser handles plain text without tool syntax."""
# Apply xfail markers if configured
test_name
=
"test_no_tool_calls"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
no_tool_calls_output
,
streaming
=
streaming
)
assert
content
==
test_config
.
no_tool_calls_output
,
(
f
"Expected content to match input, got
{
content
}
"
)
assert
len
(
tool_calls
)
==
0
,
f
"Expected no tool calls, got
{
len
(
tool_calls
)
}
"
def
test_single_tool_call_simple_args
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser extracts one tool with simple arguments."""
# Apply xfail markers if configured
test_name
=
"test_single_tool_call_simple_args"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
single_tool_call_output
,
streaming
=
streaming
)
# Content check (some parsers strip it)
if
test_config
.
single_tool_call_expected_content
is
not
None
:
assert
content
==
test_config
.
single_tool_call_expected_content
assert
len
(
tool_calls
)
==
1
,
f
"Expected 1 tool call, got
{
len
(
tool_calls
)
}
"
assert
tool_calls
[
0
].
type
==
"function"
assert
tool_calls
[
0
].
function
.
name
==
test_config
.
single_tool_call_expected_name
args
=
json
.
loads
(
tool_calls
[
0
].
function
.
arguments
)
for
key
,
value
in
test_config
.
single_tool_call_expected_args
.
items
():
assert
args
.
get
(
key
)
==
value
,
(
f
"Expected
{
key
}
=
{
value
}
, got
{
args
.
get
(
key
)
}
"
)
def
test_parallel_tool_calls
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser handles multiple tools in one response."""
# Apply xfail markers if configured
test_name
=
"test_parallel_tool_calls"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
parallel_tool_calls_output
,
streaming
=
streaming
,
)
assert
len
(
tool_calls
)
==
test_config
.
parallel_tool_calls_count
,
(
f
"Expected
{
test_config
.
parallel_tool_calls_count
}
"
f
"tool calls, got
{
len
(
tool_calls
)
}
"
)
# Verify tool names match expected
for
i
,
expected_name
in
enumerate
(
test_config
.
parallel_tool_calls_names
):
assert
tool_calls
[
i
].
type
==
"function"
assert
tool_calls
[
i
].
function
.
name
==
expected_name
# Verify unique IDs
ids
=
[
tc
.
id
for
tc
in
tool_calls
]
assert
len
(
ids
)
==
len
(
set
(
ids
)),
"Tool call IDs should be unique"
def
test_various_data_types
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser handles all JSON types in arguments."""
# Apply xfail markers if configured
test_name
=
"test_various_data_types"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
various_data_types_output
,
streaming
=
streaming
,
)
assert
len
(
tool_calls
)
==
1
,
f
"Expected 1 tool call, got
{
len
(
tool_calls
)
}
"
args
=
json
.
loads
(
tool_calls
[
0
].
function
.
arguments
)
# Verify all expected fields present
required_fields_types
=
{
"string_field"
:
str
,
"int_field"
:
int
,
"float_field"
:
float
,
"bool_field"
:
bool
,
"null_field"
:
NoneType
,
"array_field"
:
list
,
"object_field"
:
dict
,
}
for
required_field
,
expected_type
in
required_fields_types
.
items
():
assert
required_field
in
args
,
(
f
"Expected field '
{
required_field
}
' in arguments"
)
if
test_config
.
supports_typed_arguments
:
found_type
=
type
(
args
[
required_field
])
assert
found_type
is
expected_type
,
(
f
"Expected field '
{
required_field
}
' to have type
{
expected_type
}
, "
f
"got
{
found_type
}
"
)
def
test_empty_arguments
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser handles parameterless tool calls."""
# Apply xfail markers if configured
test_name
=
"test_empty_arguments"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
empty_arguments_output
,
streaming
=
streaming
)
assert
len
(
tool_calls
)
==
1
,
f
"Expected 1 tool call, got
{
len
(
tool_calls
)
}
"
args
=
tool_calls
[
0
].
function
.
arguments
if
test_config
.
allow_empty_or_json_empty_args
:
assert
args
in
[
"{}"
,
""
],
f
"Expected empty args, got
{
args
}
"
else
:
assert
args
==
"{}"
,
f
"Expected {{}}, got
{
args
}
"
def
test_surrounding_text
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser extracts tools from mixed content."""
# Apply xfail markers if configured
test_name
=
"test_surrounding_text"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
surrounding_text_output
,
streaming
=
streaming
)
assert
len
(
tool_calls
)
>=
1
,
(
f
"Expected at least 1 tool call, got
{
len
(
tool_calls
)
}
"
)
def
test_escaped_strings
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser handles escaped characters in arguments."""
# Apply xfail markers if configured
test_name
=
"test_escaped_strings"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
test_config
.
escaped_strings_output
,
streaming
=
streaming
)
assert
len
(
tool_calls
)
==
1
,
f
"Expected 1 tool call, got
{
len
(
tool_calls
)
}
"
args
=
json
.
loads
(
tool_calls
[
0
].
function
.
arguments
)
# At minimum, verify we can parse and have expected fields
# Exact escaping behavior varies by parser
assert
len
(
args
)
>
0
,
"Expected some arguments with escaped strings"
def
test_malformed_input
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
streaming
:
bool
,
):
"""Verify parser gracefully handles invalid syntax."""
# Apply xfail markers if configured
test_name
=
"test_malformed_input"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
streaming
)
for
malformed_input
in
test_config
.
malformed_input_outputs
:
# Should not raise exception
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
malformed_input
,
streaming
=
streaming
)
# Parser should handle gracefully (exact behavior varies)
def
test_streaming_reconstruction
(
self
,
request
:
pytest
.
FixtureRequest
,
tool_parser
:
Any
,
test_config
:
ToolParserTestConfig
,
):
"""Verify streaming produces same result as non-streaming."""
test_name
=
"test_streaming_reconstruction"
self
.
apply_xfail_mark
(
request
,
test_config
,
test_name
,
True
)
test_output
=
test_config
.
single_tool_call_output
# Non-streaming result
content_non
,
tools_non
=
run_tool_extraction
(
tool_parser
,
test_output
,
streaming
=
False
)
# Streaming result
content_stream
,
tools_stream
=
run_tool_extraction
(
tool_parser
,
test_output
,
streaming
=
True
)
# Compare results
assert
content_non
==
content_stream
,
"Content should match between modes"
assert
len
(
tools_non
)
==
len
(
tools_stream
),
"Tool count should match"
if
len
(
tools_non
)
>
0
:
assert
tools_non
[
0
].
function
.
name
==
tools_stream
[
0
].
function
.
name
assert
tools_non
[
0
].
function
.
arguments
==
tools_stream
[
0
].
function
.
arguments
def
apply_xfail_mark
(
self
,
request
,
test_config
,
test_name
,
streaming
):
reason
=
None
if
streaming
and
test_name
in
test_config
.
xfail_streaming
:
reason
=
test_config
.
xfail_streaming
[
test_name
]
elif
not
streaming
and
test_name
in
test_config
.
xfail_nonstreaming
:
reason
=
test_config
.
xfail_nonstreaming
[
test_name
]
if
reason
is
not
None
:
mark
=
pytest
.
mark
.
xfail
(
reason
=
reason
,
strict
=
True
)
request
.
node
.
add_marker
(
mark
)
tests/tool_parsers/conftest.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
transformers
import
AutoTokenizer
from
vllm.tokenizers
import
TokenizerLike
@
pytest
.
fixture
(
scope
=
"module"
)
def
default_tokenizer
()
->
TokenizerLike
:
return
AutoTokenizer
.
from_pretrained
(
"gpt2"
)
tests/tool_parsers/test_deepseekv3_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
class
TestDeepSeekV3ToolParser
(
ToolParserTests
):
@
pytest
.
fixture
(
scope
=
"class"
)
def
tokenizer
(
self
)
->
TokenizerLike
:
return
get_tokenizer
(
"deepseek-ai/DeepSeek-V3"
)
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"deepseek_v3"
,
# Test data
no_tool_calls_output
=
(
"How can I help you today? I can check weather for you."
),
single_tool_call_output
=
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo", "unit": "celsius"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
,
parallel_tool_calls_output
=
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo", "unit": "celsius"}
```<|tool▁call▁end|><|tool▁call▁begin|>function<|tool▁sep|>search_hotels
```json
{"location": "Tokyo", "check_in": "2025-01-15"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
,
various_data_types_output
=
(
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function
```json
"""
"""{"string_field": "hello", "int_field": 42, "float_field": 3.14, """
""""bool_field": true, "null_field": null, """
""""array_field": ["a", "b", "c"], """
""""object_field": {"nested": "value"}, """
""""empty_array": [], "empty_object": {}}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
empty_arguments_output
=
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_time
```json
{}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
,
surrounding_text_output
=
(
"""Let me check the weather for you."""
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Paris"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
escaped_strings_output
=
(
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>send_message
```json
"""
"""{"text": "He said
\\
"hello
\\
"", "path": "C:
\\\\
Users
\\\\
file", """
""""newline": "line1
\\
nline2"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
malformed_input_outputs
=
[
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo"
```<|tool▁call▁end|><|tool▁calls▁end|>"""
,
"""<|tool▁calls▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo"}
```<|tool▁calls▁end|>"""
,
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
,
"unit"
:
"celsius"
},
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"search_hotels"
],
# xfail markers
xfail_streaming
=
{},
xfail_nonstreaming
=
{
"test_malformed_input"
:
(
"Parser sets tools_called=True even when tool_calls is "
"empty (detects start token but fails to parse)"
),
},
)
tests/tool_parsers/test_granite_20b_fc_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
class
TestGranite20bFcToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"granite-20b-fc"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
'<function_call> {"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}'
),
parallel_tool_calls_output
=
(
'<function_call> {"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}
\n
'
'<function_call> {"name": "get_time", '
'"arguments": {"timezone": "Asia/Tokyo"}}'
),
various_data_types_output
=
"""<function_call> {
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}"""
,
empty_arguments_output
=
(
'<function_call> {"name": "refresh", "arguments": {}}'
),
surrounding_text_output
=
"""Let me check the weather for you.
<function_call> {"name": "get_weather", "arguments": {"city": "Tokyo"}}"""
,
escaped_strings_output
=
"""<function_call> {
"name": "test_function",
"arguments": {
"quoted": "He said
\\
"hello
\\
"",
"path": "C:
\\\\
Users
\\\\
file.txt",
"newline": "line1
\\
nline2",
"unicode": "emoji: 🎉"
}
}"""
,
malformed_input_outputs
=
[
'<function_call> {"name": "func", "arguments": {'
,
'<function_call> [{"name": "func", "arguments": {}}]'
,
'{"name": "func", "arguments": {}}'
,
'<function_call> {"name": 123}'
,
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
# xfail markers
xfail_streaming
=
{
"test_surrounding_text"
:
(
"Granite 20B FC streaming requires <function_call> at start"
),
},
xfail_nonstreaming
=
{},
)
tests/tool_parsers/test_granite_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
tests.tool_parsers.utils
import
run_tool_extraction
class
TestGraniteToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"granite"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
'<|tool_call|> [{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}]'
),
parallel_tool_calls_output
=
"""<|tool_call|> [
{"name": "get_weather", "arguments": {"city": "Tokyo"}},
{"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}}
]"""
,
various_data_types_output
=
"""<tool_call> [{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}]"""
,
empty_arguments_output
=
(
'<|tool_call|> [{"name": "refresh", "arguments": {}}]'
),
surrounding_text_output
=
"""Let me check the weather for you.
<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]
I'll get that information."""
,
escaped_strings_output
=
"""<tool_call> [{
"name": "test_function",
"arguments": {
"quoted": "He said
\\
"hello
\\
"",
"path": "C:
\\\\
Users
\\\\
file.txt",
"newline": "line1
\\
nline2",
"unicode": "emoji: 🎉"
}
}]"""
,
malformed_input_outputs
=
[
'<|tool_call|> [{"name": "func", "arguments": {'
,
'<|tool_call|> {"name": "func", "arguments": {}}'
,
# Not an array
'[{"name": "func", "arguments": "not a dict"}]'
,
'Some text [{"name": "func"}]'
,
# JSON but not tool call format
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
# Granite strips content when tool calls present
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
# xfail markers
xfail_streaming
=
{
"test_malformed_input"
:
(
"Streaming mode incorrectly creates tool call from malformed JSON"
),
"test_surrounding_text"
:
(
"Parser doesn't handle surrounding text correctly in streaming"
),
"test_streaming_reconstruction"
:
(
"Streaming mode doesn't strip <|tool_call|> marker from content"
),
},
xfail_nonstreaming
=
{
"test_surrounding_text"
:
(
"Parser doesn't handle surrounding text correctly in non-streaming"
),
},
)
# Granite-Specific Tests
@
pytest
.
mark
.
parametrize
(
"streaming"
,
[
True
,
False
])
def
test_granite_token_prefix_format
(
self
,
tool_parser
,
streaming
):
"""Verify parser handles Granite 3.0 <|tool_call|> token format."""
single_tool_call_token
=
(
'<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
single_tool_call_token
,
streaming
=
streaming
)
assert
len
(
tool_calls
)
==
1
,
(
f
"Expected 1 tool call from token format, got
{
len
(
tool_calls
)
}
"
)
assert
tool_calls
[
0
].
function
.
name
==
"get_weather"
@
pytest
.
mark
.
parametrize
(
"streaming"
,
[
True
,
False
])
def
test_granite_string_prefix_format
(
self
,
tool_parser
,
streaming
):
"""Verify parser handles Granite 3.1 <tool_call> string format."""
single_tool_call_string
=
(
'<tool_call> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
single_tool_call_string
,
streaming
=
streaming
)
assert
len
(
tool_calls
)
==
1
,
(
f
"Expected 1 tool call from string format, got
{
len
(
tool_calls
)
}
"
)
assert
tool_calls
[
0
].
function
.
name
==
"get_weather"
tests/tool_parsers/test_internlm2_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
unittest.mock
import
MagicMock
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
vllm.tokenizers
import
TokenizerLike
class
TestInternLM2ToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
tokenizer
(
self
,
default_tokenizer
:
TokenizerLike
)
->
TokenizerLike
:
"""Add some internlm2 specific tokens to the default vocab."""
tokenizer_vocab
=
default_tokenizer
.
get_vocab
()
default_tokenizer
.
get_vocab
=
MagicMock
()
tokenizer_vocab
.
update
(
{
"<|action_start|>"
:
92540
,
"<|plugin|>"
:
92541
,
"<|action_end|>"
:
92542
,
}
)
default_tokenizer
.
get_vocab
.
return_value
=
tokenizer_vocab
return
default_tokenizer
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"internlm"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
# InternLM2 doesn't support parallel calls
parallel_tool_calls_output
=
(
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
various_data_types_output
=
"""<|action_start|><|plugin|>{
"name": "test_function",
"parameters": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}<|action_end|>"""
,
empty_arguments_output
=
(
'<|action_start|><|plugin|>{"name": "refresh", '
'"parameters": {}}<|action_end|>'
),
surrounding_text_output
=
(
"Let me check the weather for you. "
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
escaped_strings_output
=
"""<|action_start|><|plugin|>{
"name": "test_function",
"parameters": {
"quoted": "He said
\\
"hello
\\
"",
"path": "C:
\\\\
Users
\\\\
file.txt",
"newline": "line1
\\
nline2",
"unicode": "emoji: 🎉"
}
}<|action_end|>"""
,
malformed_input_outputs
=
[
'<|action_start|><|plugin|>{"name": "func", "parameters": {'
,
(
'<|action_start|><|plugin|>{"name": "func", '
'"parameters": "not a dict"}<|action_end|>'
),
"<|action_start|><|plugin|>not json<|action_end|>"
,
"<|action_start|><|plugin|>"
,
'<|action_start|>{"name": "func"}'
,
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
1
,
# InternLM2 only supports single tool calls
parallel_tool_calls_names
=
[
"get_weather"
],
# Parser-specific settings
allow_empty_or_json_empty_args
=
True
,
# xfail markers
xfail_streaming
=
{
"test_single_tool_call_simple_args"
:
(
"InternLM2 streaming not fully implemented"
),
"test_parallel_tool_calls"
:
(
"InternLM2 streaming not fully implemented"
),
"test_various_data_types"
:
(
"InternLM2 streaming not fully implemented"
),
"test_empty_arguments"
:
(
"InternLM2 streaming not fully implemented"
),
"test_surrounding_text"
:
(
"InternLM2 streaming not fully implemented"
),
"test_escaped_strings"
:
(
"InternLM2 streaming not fully implemented"
),
"test_streaming_reconstruction"
:
(
"InternLM2 streaming parser returns '<|action_start|' as "
"content instead of None - streaming/non-streaming inconsistency"
),
},
xfail_nonstreaming
=
{
"test_malformed_input"
:
(
"InternLM2 parser raises JSONDecodeError on malformed JSON "
"instead of gracefully handling it"
),
},
)
tests/tool_parsers/test_longcat_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
unittest.mock
import
MagicMock
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
vllm.tokenizers
import
TokenizerLike
class
TestLongCatToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
tokenizer
(
self
,
default_tokenizer
:
TokenizerLike
)
->
TokenizerLike
:
"""Add some longcat specific tokens to the default vocab."""
tokenizer
=
default_tokenizer
tokenizer_vocab
=
tokenizer
.
get_vocab
()
tokenizer
.
get_vocab
=
MagicMock
()
tokenizer_vocab
.
update
(
{
"<longcat_tool_call>"
:
32000
,
"</longcat_tool_call>"
:
32001
,
}
)
tokenizer
.
get_vocab
.
return_value
=
tokenizer_vocab
return
tokenizer
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"longcat"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>'
),
parallel_tool_calls_output
=
(
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>
\n
'
'<longcat_tool_call>{"name": "get_time", '
'"arguments": {"timezone": "Asia/Tokyo"}}</longcat_tool_call>'
),
various_data_types_output
=
"""<longcat_tool_call>{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}</longcat_tool_call>"""
,
empty_arguments_output
=
(
'<longcat_tool_call>{"name": "refresh", "arguments": {}}'
"</longcat_tool_call>"
),
surrounding_text_output
=
(
"Let me check the weather for you.
\n
"
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>
\n
'
"Here is the result."
),
escaped_strings_output
=
"""<longcat_tool_call>{
"name": "test_function",
"arguments": {
"quoted": "He said
\\
"hello
\\
"",
"path": "C:
\\\\
Users
\\\\
file.txt",
"newline": "line1
\\
nline2",
"unicode": "emoji: 🎉"
}
}</longcat_tool_call>"""
,
malformed_input_outputs
=
[
'<longcat_tool_call>{"name": "func", "arguments": {'
,
(
'<longcat_tool_call>{"name": "func", '
'"arguments": "not a dict"}</longcat_tool_call>'
),
"Some text with <longcat_tool_call>invalid json"
,
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
# xfail markers
xfail_streaming
=
{
"test_malformed_input"
:
"Streaming has complex buffering behavior"
,
},
xfail_nonstreaming
=
{},
# Configuration
allow_empty_or_json_empty_args
=
True
,
)
tests/tool_parsers/test_phi4mini_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
unittest.mock
import
MagicMock
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
vllm.tokenizers
import
TokenizerLike
class
TestPhi4MiniToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
tokenizer
(
self
,
default_tokenizer
:
TokenizerLike
)
->
TokenizerLike
:
"""Add some phi4mini specific tokens to the default vocab."""
tokenizer
=
default_tokenizer
tokenizer_vocab
=
tokenizer
.
get_vocab
()
tokenizer
.
get_vocab
=
MagicMock
()
tokenizer_vocab
.
update
(
{
"functools"
:
32000
,
}
)
tokenizer
.
get_vocab
.
return_value
=
tokenizer_vocab
return
tokenizer
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"phi4_mini_json"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
'functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
),
parallel_tool_calls_output
=
"""functools[
{"name": "get_weather", "arguments": {"city": "Tokyo"}},
{"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}}
]"""
,
various_data_types_output
=
"""functools[{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}]"""
,
empty_arguments_output
=
'functools[{"name": "refresh", "arguments": {}}]'
,
surrounding_text_output
=
"""Let me check the weather for you.
functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]
Would you like to know more?"""
,
escaped_strings_output
=
"""functools[{
"name": "test_function",
"arguments": {
"quoted": "He said
\\
"hello
\\
"",
"path": "C:
\\\\
Users
\\\\
file.txt",
"newline": "line1
\\
nline2",
"unicode": "emoji: 🎉"
}
}]"""
,
malformed_input_outputs
=
[
'functools[{"name": "func", "arguments": {'
,
'functools[{"name": "func", "arguments": "not a dict"}]'
,
'functools{"name": "func"}'
,
# Missing brackets
'functools[{"name": "func"}]'
,
# Missing arguments/parameters
"functools[] This is just text"
,
# Empty functools
"functools[ This is just text ]"
,
# functools with invalid JSON
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
# Phi-4 Mini strips content when tool calls present
single_tool_call_expected_content
=
None
,
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
parallel_tool_calls_expected_content
=
None
,
# xfail markers
xfail_streaming
=
{
"test_no_tool_calls"
:
"Phi4 Mini streaming not implemented"
,
"test_single_tool_call_simple_args"
:
(
"Phi4 Mini streaming not implemented"
),
"test_parallel_tool_calls"
:
"Phi4 Mini streaming not implemented"
,
"test_various_data_types"
:
"Phi4 Mini streaming not implemented"
,
"test_empty_arguments"
:
"Phi4 Mini streaming not implemented"
,
"test_surrounding_text"
:
"Phi4 Mini streaming not implemented"
,
"test_escaped_strings"
:
"Phi4 Mini streaming not implemented"
,
"test_streaming_reconstruction"
:
"Phi4 Mini streaming not implemented"
,
},
xfail_nonstreaming
=
{
"test_various_data_types"
:
(
"Phi4MiniJsonToolParser regex has nesting limitations "
"with nested objects"
),
"test_malformed_input"
:
(
"Phi4MiniJsonToolParser incorrectly sets "
"tools_called=True on empty array"
),
},
)
tests/tool_parsers/test_qwen3xml_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
class
TestQwen3xmlToolParser
(
ToolParserTests
):
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"qwen3_xml"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
"<tool_call>
\n
<function=get_weather>
\n
<parameter=city>Tokyo</parameter>
\n
</function>
\n
</tool_call>"
,
parallel_tool_calls_output
=
"<tool_call>
\n
<function=get_weather>
\n
<parameter=city>Tokyo</parameter>
\n
</function>
\n
</tool_call><tool_call>
\n
<function=get_time>
\n
<parameter=timezone>Asia/Tokyo</parameter>
\n
</function>
\n
</tool_call>"
,
various_data_types_output
=
(
"<tool_call>
\n
<function=test_function>
\n
"
"<parameter=string_field>hello</parameter>
\n
"
"<parameter=int_field>42</parameter>
\n
"
"<parameter=float_field>3.14</parameter>
\n
"
"<parameter=bool_field>true</parameter>
\n
"
"<parameter=null_field>null</parameter>
\n
"
'<parameter=array_field>["a", "b", "c"]</parameter>
\n
'
'<parameter=object_field>{"nested": "value"}</parameter>
\n
'
"</function>
\n
</tool_call>"
),
empty_arguments_output
=
"<tool_call>
\n
<function=refresh>
\n
</function>
\n
</tool_call>"
,
surrounding_text_output
=
(
"Let me check the weather for you.
\n\n
"
"<tool_call>
\n
<function=get_weather>
\n
"
"<parameter=city>Tokyo</parameter>
\n
"
"</function>
\n
</tool_call>
\n\n
"
"I will get that information."
),
escaped_strings_output
=
(
"<tool_call>
\n
<function=test_function>
\n
"
'<parameter=quoted>He said "hello"</parameter>
\n
'
"<parameter=path>C:
\\
Users
\\
file.txt</parameter>
\n
"
"<parameter=newline>line1
\n
line2</parameter>
\n
"
"</function>
\n
</tool_call>"
),
malformed_input_outputs
=
[
"<tool_call><function=func>"
,
"<tool_call><function=></function></tool_call>"
,
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
# xfail markers - Qwen3XML has systematic streaming issues
xfail_streaming
=
{
"test_single_tool_call_simple_args"
:
(
"Qwen3XML streaming has systematic issues"
),
"test_parallel_tool_calls"
:
"Qwen3XML streaming has systematic issues"
,
"test_various_data_types"
:
"Qwen3XML streaming has systematic issues"
,
"test_empty_arguments"
:
"Qwen3XML streaming has systematic issues"
,
"test_surrounding_text"
:
"Qwen3XML streaming has systematic issues"
,
"test_escaped_strings"
:
"Qwen3XML streaming has systematic issues"
,
"test_malformed_input"
:
(
"Qwen3XML parser is lenient with malformed input"
),
"test_streaming_reconstruction"
:
(
"Qwen3XML streaming reconstruction has known issues"
),
},
supports_typed_arguments
=
False
,
)
tests/tool_parsers/test_step3_tool_parser.py
0 → 100644
View file @
7a49742b
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
tests.tool_parsers.common_tests
import
(
ToolParserTestConfig
,
ToolParserTests
,
)
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
class
TestStep3ToolParser
(
ToolParserTests
):
@
pytest
.
fixture
(
scope
=
"class"
)
def
tokenizer
(
self
)
->
TokenizerLike
:
return
get_tokenizer
(
"stepfun-ai/step3"
)
@
pytest
.
fixture
def
test_config
(
self
)
->
ToolParserTestConfig
:
return
ToolParserTestConfig
(
parser_name
=
"step3"
,
# Test data
no_tool_calls_output
=
"This is a regular response without any tool calls."
,
single_tool_call_output
=
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
parallel_tool_calls_output
=
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_sep|>"
'<|tool_call_begin|><steptml:invoke name="get_time">'
'<steptml:parameter name="timezone">Asia/Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
various_data_types_output
=
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="test_function">'
'<steptml:parameter name="string_field">hello</steptml:parameter>'
'<steptml:parameter name="int_field">42</steptml:parameter>'
'<steptml:parameter name="float_field">3.14</steptml:parameter>'
'<steptml:parameter name="bool_field">true</steptml:parameter>'
'<steptml:parameter name="null_field">null</steptml:parameter>'
'<steptml:parameter name="array_field">'
'["a", "b", "c"]</steptml:parameter>'
'<steptml:parameter name="object_field">'
'{"nested": "value"}</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
empty_arguments_output
=
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="refresh"></steptml:invoke>'
"<|tool_call_end|><|tool_calls_end|>"
),
surrounding_text_output
=
(
"Let me check the weather for you.
\n\n
"
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>
\n\n
"
"I'll get that information."
),
escaped_strings_output
=
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="test_function">'
'<steptml:parameter name="quoted">He said "hello"</steptml:parameter>'
'<steptml:parameter name="path">C:
\\
Users
\\
file.txt</steptml:parameter>'
'<steptml:parameter name="newline">line1
\n
line2</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
malformed_input_outputs
=
[
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="func">'
),
(
'<|tool_call_begin|><steptml:invoke name="func">'
"</steptml:invoke><|tool_call_end|>"
),
],
# Expected results
single_tool_call_expected_name
=
"get_weather"
,
single_tool_call_expected_args
=
{
"city"
:
"Tokyo"
},
parallel_tool_calls_count
=
2
,
parallel_tool_calls_names
=
[
"get_weather"
,
"get_time"
],
# xfail markers
xfail_nonstreaming
=
{
"test_single_tool_call_simple_args"
:
(
"Step3 parser non-streaming has bugs"
),
"test_parallel_tool_calls"
:
(
"Step3 parser non-streaming has bugs"
),
"test_various_data_types"
:
"Step3 parser non-streaming has bugs"
,
"test_empty_arguments"
:
"Step3 parser non-streaming has bugs"
,
"test_surrounding_text"
:
"Step3 parser non-streaming has bugs"
,
"test_escaped_strings"
:
"Step3 parser non-streaming has bugs"
,
},
xfail_streaming
=
{
"test_parallel_tool_calls"
:
(
"Step3 parser has significant bugs in both streaming "
"and non-streaming"
),
"test_streaming_reconstruction"
:
(
"Step3 parser non-streaming has bugs, so streaming "
"doesn't match non-streaming"
),
},
supports_typed_arguments
=
False
,
)
tests/
entrypoints/openai/
tool_parsers/utils.py
→
tests/tool_parsers/utils.py
View file @
7a49742b
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment