Unverified Commit 7a49742b authored by Ben Browning's avatar Ben Browning Committed by GitHub
Browse files

[CI/Build] Add common tool call parser test suite (#27599)


Signed-off-by: default avatarBen Browning <bbrownin@redhat.com>
parent 3e6a1e16
......@@ -5,7 +5,7 @@ import json
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
......@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import pytest
from tests.entrypoints.openai.tool_parsers.utils import (
from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from dataclasses import dataclass, field
from types import NoneType
from typing import Any
import pytest
from tests.tool_parsers.utils import run_tool_extraction
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParserManager
@dataclass
class ToolParserTestConfig:
"""Configuration for a tool parser's common tests.
This dataclass contains all the test data and expected results needed
to run the common test suite for a parser. Each parser test file
creates one instance of this config with parser-specific values.
Attributes:
parser_name: Name used with ToolParserManager (e.g., "mistral")
Test data (model outputs):
no_tool_calls_output: Plain text without any tool syntax
single_tool_call_output: One tool call with simple arguments
parallel_tool_calls_output: Multiple tool calls in one response
various_data_types_output: Tool with various data types
empty_arguments_output: Tool call with no parameters
surrounding_text_output: Tool call mixed with regular text
escaped_strings_output: Tool call with escaped chars
malformed_input_outputs: List of invalid inputs
Expected results:
single_tool_call_expected_name: Expected function name
single_tool_call_expected_args: Expected arguments dict
parallel_tool_calls_count: Number of tools in parallel test
parallel_tool_calls_names: Function names in order
single_tool_call_expected_content: Content field when tool called
parallel_tool_calls_expected_content: Content for parallel test
xfail markers:
xfail_streaming: Mapping test name to xfail reason (streaming only)
xfail_nonstreaming: Mapping test name to xfail reason (non-streaming)
Special flags:
allow_empty_or_json_empty_args: True if "" or "{}" both valid for empty args
supports_typed_arguments: True if the parser supports typed function arguments
"""
# Parser identification
parser_name: str
# Test data - model outputs for each common test
no_tool_calls_output: str
single_tool_call_output: str
parallel_tool_calls_output: str
various_data_types_output: str
empty_arguments_output: str
surrounding_text_output: str
escaped_strings_output: str
malformed_input_outputs: list[str]
# Expected results for specific tests (optional overrides)
single_tool_call_expected_name: str = "get_weather"
single_tool_call_expected_args: dict[str, Any] = field(
default_factory=lambda: {"city": "Tokyo"}
)
parallel_tool_calls_count: int = 2
parallel_tool_calls_names: list[str] = field(
default_factory=lambda: ["get_weather", "get_time"]
)
# xfail configuration - maps test name to xfail reason
xfail_streaming: dict[str, str] = field(default_factory=dict)
xfail_nonstreaming: dict[str, str] = field(default_factory=dict)
# Content expectations (some parsers strip content, others don't)
single_tool_call_expected_content: str | None = None
parallel_tool_calls_expected_content: str | None = None
# Special assertions for edge cases
allow_empty_or_json_empty_args: bool = True # "{}" or "" for empty args
supports_typed_arguments: bool = True
class ToolParserTests:
"""Mixin class providing common test suite for tool parsers.
To use this mixin in a parser test file:
1. Create a test_config fixture that returns a ToolParserTestConfig instance
2. Inherit from this class
3. Add parser-specific tests as additional methods
Example:
class TestMistralToolParser(ToolParserTests):
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="mistral",
no_tool_calls_output="Plain text...",
# ... other config ...
)
# Parser-specific tests
def test_mistral_specific_feature(self, tool_parser):
# Custom test logic
pass
"""
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
"""Override this to provide parser-specific configuration."""
raise NotImplementedError(
"Subclass must provide test_config fixture returning ToolParserTestConfig"
)
@pytest.fixture
def tokenizer(self, default_tokenizer: TokenizerLike) -> TokenizerLike:
"""Override this to provide parser-specific tokenizer."""
return default_tokenizer
@pytest.fixture
def tool_parser(self, test_config: ToolParserTestConfig, tokenizer: TokenizerLike):
return ToolParserManager.get_tool_parser(test_config.parser_name)(tokenizer)
@pytest.fixture(params=[True, False])
def streaming(self, request: pytest.FixtureRequest) -> bool:
return request.param
def test_no_tool_calls(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser handles plain text without tool syntax."""
# Apply xfail markers if configured
test_name = "test_no_tool_calls"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser, test_config.no_tool_calls_output, streaming=streaming
)
assert content == test_config.no_tool_calls_output, (
f"Expected content to match input, got {content}"
)
assert len(tool_calls) == 0, f"Expected no tool calls, got {len(tool_calls)}"
def test_single_tool_call_simple_args(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser extracts one tool with simple arguments."""
# Apply xfail markers if configured
test_name = "test_single_tool_call_simple_args"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser, test_config.single_tool_call_output, streaming=streaming
)
# Content check (some parsers strip it)
if test_config.single_tool_call_expected_content is not None:
assert content == test_config.single_tool_call_expected_content
assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}"
assert tool_calls[0].type == "function"
assert tool_calls[0].function.name == test_config.single_tool_call_expected_name
args = json.loads(tool_calls[0].function.arguments)
for key, value in test_config.single_tool_call_expected_args.items():
assert args.get(key) == value, (
f"Expected {key}={value}, got {args.get(key)}"
)
def test_parallel_tool_calls(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser handles multiple tools in one response."""
# Apply xfail markers if configured
test_name = "test_parallel_tool_calls"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser,
test_config.parallel_tool_calls_output,
streaming=streaming,
)
assert len(tool_calls) == test_config.parallel_tool_calls_count, (
f"Expected {test_config.parallel_tool_calls_count} "
f"tool calls, got {len(tool_calls)}"
)
# Verify tool names match expected
for i, expected_name in enumerate(test_config.parallel_tool_calls_names):
assert tool_calls[i].type == "function"
assert tool_calls[i].function.name == expected_name
# Verify unique IDs
ids = [tc.id for tc in tool_calls]
assert len(ids) == len(set(ids)), "Tool call IDs should be unique"
def test_various_data_types(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser handles all JSON types in arguments."""
# Apply xfail markers if configured
test_name = "test_various_data_types"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser,
test_config.various_data_types_output,
streaming=streaming,
)
assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}"
args = json.loads(tool_calls[0].function.arguments)
# Verify all expected fields present
required_fields_types = {
"string_field": str,
"int_field": int,
"float_field": float,
"bool_field": bool,
"null_field": NoneType,
"array_field": list,
"object_field": dict,
}
for required_field, expected_type in required_fields_types.items():
assert required_field in args, (
f"Expected field '{required_field}' in arguments"
)
if test_config.supports_typed_arguments:
found_type = type(args[required_field])
assert found_type is expected_type, (
f"Expected field '{required_field}' to have type {expected_type}, "
f"got {found_type}"
)
def test_empty_arguments(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser handles parameterless tool calls."""
# Apply xfail markers if configured
test_name = "test_empty_arguments"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser, test_config.empty_arguments_output, streaming=streaming
)
assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}"
args = tool_calls[0].function.arguments
if test_config.allow_empty_or_json_empty_args:
assert args in ["{}", ""], f"Expected empty args, got {args}"
else:
assert args == "{}", f"Expected {{}}, got {args}"
def test_surrounding_text(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser extracts tools from mixed content."""
# Apply xfail markers if configured
test_name = "test_surrounding_text"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser, test_config.surrounding_text_output, streaming=streaming
)
assert len(tool_calls) >= 1, (
f"Expected at least 1 tool call, got {len(tool_calls)}"
)
def test_escaped_strings(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser handles escaped characters in arguments."""
# Apply xfail markers if configured
test_name = "test_escaped_strings"
self.apply_xfail_mark(request, test_config, test_name, streaming)
content, tool_calls = run_tool_extraction(
tool_parser, test_config.escaped_strings_output, streaming=streaming
)
assert len(tool_calls) == 1, f"Expected 1 tool call, got {len(tool_calls)}"
args = json.loads(tool_calls[0].function.arguments)
# At minimum, verify we can parse and have expected fields
# Exact escaping behavior varies by parser
assert len(args) > 0, "Expected some arguments with escaped strings"
def test_malformed_input(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
streaming: bool,
):
"""Verify parser gracefully handles invalid syntax."""
# Apply xfail markers if configured
test_name = "test_malformed_input"
self.apply_xfail_mark(request, test_config, test_name, streaming)
for malformed_input in test_config.malformed_input_outputs:
# Should not raise exception
content, tool_calls = run_tool_extraction(
tool_parser, malformed_input, streaming=streaming
)
# Parser should handle gracefully (exact behavior varies)
def test_streaming_reconstruction(
self,
request: pytest.FixtureRequest,
tool_parser: Any,
test_config: ToolParserTestConfig,
):
"""Verify streaming produces same result as non-streaming."""
test_name = "test_streaming_reconstruction"
self.apply_xfail_mark(request, test_config, test_name, True)
test_output = test_config.single_tool_call_output
# Non-streaming result
content_non, tools_non = run_tool_extraction(
tool_parser, test_output, streaming=False
)
# Streaming result
content_stream, tools_stream = run_tool_extraction(
tool_parser, test_output, streaming=True
)
# Compare results
assert content_non == content_stream, "Content should match between modes"
assert len(tools_non) == len(tools_stream), "Tool count should match"
if len(tools_non) > 0:
assert tools_non[0].function.name == tools_stream[0].function.name
assert tools_non[0].function.arguments == tools_stream[0].function.arguments
def apply_xfail_mark(self, request, test_config, test_name, streaming):
reason = None
if streaming and test_name in test_config.xfail_streaming:
reason = test_config.xfail_streaming[test_name]
elif not streaming and test_name in test_config.xfail_nonstreaming:
reason = test_config.xfail_nonstreaming[test_name]
if reason is not None:
mark = pytest.mark.xfail(reason=reason, strict=True)
request.node.add_marker(mark)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from transformers import AutoTokenizer
from vllm.tokenizers import TokenizerLike
@pytest.fixture(scope="module")
def default_tokenizer() -> TokenizerLike:
return AutoTokenizer.from_pretrained("gpt2")
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from vllm.tokenizers import TokenizerLike, get_tokenizer
class TestDeepSeekV3ToolParser(ToolParserTests):
@pytest.fixture(scope="class")
def tokenizer(self) -> TokenizerLike:
return get_tokenizer("deepseek-ai/DeepSeek-V3")
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="deepseek_v3",
# Test data
no_tool_calls_output=(
"How can I help you today? I can check weather for you."
),
single_tool_call_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo", "unit": "celsius"}
```<|tool▁call▁end|><|tool▁calls▁end|>""",
parallel_tool_calls_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo", "unit": "celsius"}
```<|tool▁call▁end|><|tool▁call▁begin|>function<|tool▁sep|>search_hotels
```json
{"location": "Tokyo", "check_in": "2025-01-15"}
```<|tool▁call▁end|><|tool▁calls▁end|>""",
various_data_types_output=(
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function
```json
"""
"""{"string_field": "hello", "int_field": 42, "float_field": 3.14, """
""""bool_field": true, "null_field": null, """
""""array_field": ["a", "b", "c"], """
""""object_field": {"nested": "value"}, """
""""empty_array": [], "empty_object": {}}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
empty_arguments_output="""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_time
```json
{}
```<|tool▁call▁end|><|tool▁calls▁end|>""",
surrounding_text_output=(
"""Let me check the weather for you."""
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Paris"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
escaped_strings_output=(
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>send_message
```json
"""
"""{"text": "He said \\"hello\\"", "path": "C:\\\\Users\\\\file", """
""""newline": "line1\\nline2"}
```<|tool▁call▁end|><|tool▁calls▁end|>"""
),
malformed_input_outputs=[
"""<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo"
```<|tool▁call▁end|><|tool▁calls▁end|>""",
"""<|tool▁calls▁begin|>function<|tool▁sep|>get_weather
```json
{"city": "Tokyo"}
```<|tool▁calls▁end|>""",
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo", "unit": "celsius"},
single_tool_call_expected_content=None,
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "search_hotels"],
# xfail markers
xfail_streaming={},
xfail_nonstreaming={
"test_malformed_input": (
"Parser sets tools_called=True even when tool_calls is "
"empty (detects start token but fails to parse)"
),
},
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
class TestGranite20bFcToolParser(ToolParserTests):
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="granite-20b-fc",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
'<function_call> {"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}'
),
parallel_tool_calls_output=(
'<function_call> {"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}\n'
'<function_call> {"name": "get_time", '
'"arguments": {"timezone": "Asia/Tokyo"}}'
),
various_data_types_output="""<function_call> {
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}""",
empty_arguments_output=(
'<function_call> {"name": "refresh", "arguments": {}}'
),
surrounding_text_output="""Let me check the weather for you.
<function_call> {"name": "get_weather", "arguments": {"city": "Tokyo"}}""",
escaped_strings_output="""<function_call> {
"name": "test_function",
"arguments": {
"quoted": "He said \\"hello\\"",
"path": "C:\\\\Users\\\\file.txt",
"newline": "line1\\nline2",
"unicode": "emoji: 🎉"
}
}""",
malformed_input_outputs=[
'<function_call> {"name": "func", "arguments": {',
'<function_call> [{"name": "func", "arguments": {}}]',
'{"name": "func", "arguments": {}}',
'<function_call> {"name": 123}',
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
single_tool_call_expected_content=None,
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
# xfail markers
xfail_streaming={
"test_surrounding_text": (
"Granite 20B FC streaming requires <function_call> at start"
),
},
xfail_nonstreaming={},
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from tests.tool_parsers.utils import run_tool_extraction
class TestGraniteToolParser(ToolParserTests):
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="granite",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
'<|tool_call|> [{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}]'
),
parallel_tool_calls_output="""<|tool_call|> [
{"name": "get_weather", "arguments": {"city": "Tokyo"}},
{"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}}
]""",
various_data_types_output="""<tool_call> [{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}]""",
empty_arguments_output=(
'<|tool_call|> [{"name": "refresh", "arguments": {}}]'
),
surrounding_text_output="""Let me check the weather for you.
<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]
I'll get that information.""",
escaped_strings_output="""<tool_call> [{
"name": "test_function",
"arguments": {
"quoted": "He said \\"hello\\"",
"path": "C:\\\\Users\\\\file.txt",
"newline": "line1\\nline2",
"unicode": "emoji: 🎉"
}
}]""",
malformed_input_outputs=[
'<|tool_call|> [{"name": "func", "arguments": {',
'<|tool_call|> {"name": "func", "arguments": {}}', # Not an array
'[{"name": "func", "arguments": "not a dict"}]',
'Some text [{"name": "func"}]', # JSON but not tool call format
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
# Granite strips content when tool calls present
single_tool_call_expected_content=None,
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
# xfail markers
xfail_streaming={
"test_malformed_input": (
"Streaming mode incorrectly creates tool call from malformed JSON"
),
"test_surrounding_text": (
"Parser doesn't handle surrounding text correctly in streaming"
),
"test_streaming_reconstruction": (
"Streaming mode doesn't strip <|tool_call|> marker from content"
),
},
xfail_nonstreaming={
"test_surrounding_text": (
"Parser doesn't handle surrounding text correctly in non-streaming"
),
},
)
# Granite-Specific Tests
@pytest.mark.parametrize("streaming", [True, False])
def test_granite_token_prefix_format(self, tool_parser, streaming):
"""Verify parser handles Granite 3.0 <|tool_call|> token format."""
single_tool_call_token = (
'<|tool_call|> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
)
content, tool_calls = run_tool_extraction(
tool_parser, single_tool_call_token, streaming=streaming
)
assert len(tool_calls) == 1, (
f"Expected 1 tool call from token format, got {len(tool_calls)}"
)
assert tool_calls[0].function.name == "get_weather"
@pytest.mark.parametrize("streaming", [True, False])
def test_granite_string_prefix_format(self, tool_parser, streaming):
"""Verify parser handles Granite 3.1 <tool_call> string format."""
single_tool_call_string = (
'<tool_call> [{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
)
content, tool_calls = run_tool_extraction(
tool_parser, single_tool_call_string, streaming=streaming
)
assert len(tool_calls) == 1, (
f"Expected 1 tool call from string format, got {len(tool_calls)}"
)
assert tool_calls[0].function.name == "get_weather"
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from vllm.tokenizers import TokenizerLike
class TestInternLM2ToolParser(ToolParserTests):
@pytest.fixture
def tokenizer(self, default_tokenizer: TokenizerLike) -> TokenizerLike:
"""Add some internlm2 specific tokens to the default vocab."""
tokenizer_vocab = default_tokenizer.get_vocab()
default_tokenizer.get_vocab = MagicMock()
tokenizer_vocab.update(
{
"<|action_start|>": 92540,
"<|plugin|>": 92541,
"<|action_end|>": 92542,
}
)
default_tokenizer.get_vocab.return_value = tokenizer_vocab
return default_tokenizer
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="internlm",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
# InternLM2 doesn't support parallel calls
parallel_tool_calls_output=(
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
various_data_types_output="""<|action_start|><|plugin|>{
"name": "test_function",
"parameters": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}<|action_end|>""",
empty_arguments_output=(
'<|action_start|><|plugin|>{"name": "refresh", '
'"parameters": {}}<|action_end|>'
),
surrounding_text_output=(
"Let me check the weather for you. "
'<|action_start|><|plugin|>{"name": "get_weather", '
'"parameters": {"city": "Tokyo"}}<|action_end|>'
),
escaped_strings_output="""<|action_start|><|plugin|>{
"name": "test_function",
"parameters": {
"quoted": "He said \\"hello\\"",
"path": "C:\\\\Users\\\\file.txt",
"newline": "line1\\nline2",
"unicode": "emoji: 🎉"
}
}<|action_end|>""",
malformed_input_outputs=[
'<|action_start|><|plugin|>{"name": "func", "parameters": {',
(
'<|action_start|><|plugin|>{"name": "func", '
'"parameters": "not a dict"}<|action_end|>'
),
"<|action_start|><|plugin|>not json<|action_end|>",
"<|action_start|><|plugin|>",
'<|action_start|>{"name": "func"}',
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
single_tool_call_expected_content=None,
parallel_tool_calls_count=1, # InternLM2 only supports single tool calls
parallel_tool_calls_names=["get_weather"],
# Parser-specific settings
allow_empty_or_json_empty_args=True,
# xfail markers
xfail_streaming={
"test_single_tool_call_simple_args": (
"InternLM2 streaming not fully implemented"
),
"test_parallel_tool_calls": (
"InternLM2 streaming not fully implemented"
),
"test_various_data_types": (
"InternLM2 streaming not fully implemented"
),
"test_empty_arguments": ("InternLM2 streaming not fully implemented"),
"test_surrounding_text": ("InternLM2 streaming not fully implemented"),
"test_escaped_strings": ("InternLM2 streaming not fully implemented"),
"test_streaming_reconstruction": (
"InternLM2 streaming parser returns '<|action_start|' as "
"content instead of None - streaming/non-streaming inconsistency"
),
},
xfail_nonstreaming={
"test_malformed_input": (
"InternLM2 parser raises JSONDecodeError on malformed JSON "
"instead of gracefully handling it"
),
},
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from vllm.tokenizers import TokenizerLike
class TestLongCatToolParser(ToolParserTests):
@pytest.fixture
def tokenizer(self, default_tokenizer: TokenizerLike) -> TokenizerLike:
"""Add some longcat specific tokens to the default vocab."""
tokenizer = default_tokenizer
tokenizer_vocab = tokenizer.get_vocab()
tokenizer.get_vocab = MagicMock()
tokenizer_vocab.update(
{
"<longcat_tool_call>": 32000,
"</longcat_tool_call>": 32001,
}
)
tokenizer.get_vocab.return_value = tokenizer_vocab
return tokenizer
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="longcat",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>'
),
parallel_tool_calls_output=(
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>\n'
'<longcat_tool_call>{"name": "get_time", '
'"arguments": {"timezone": "Asia/Tokyo"}}</longcat_tool_call>'
),
various_data_types_output="""<longcat_tool_call>{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}</longcat_tool_call>""",
empty_arguments_output=(
'<longcat_tool_call>{"name": "refresh", "arguments": {}}'
"</longcat_tool_call>"
),
surrounding_text_output=(
"Let me check the weather for you.\n"
'<longcat_tool_call>{"name": "get_weather", '
'"arguments": {"city": "Tokyo"}}</longcat_tool_call>\n'
"Here is the result."
),
escaped_strings_output="""<longcat_tool_call>{
"name": "test_function",
"arguments": {
"quoted": "He said \\"hello\\"",
"path": "C:\\\\Users\\\\file.txt",
"newline": "line1\\nline2",
"unicode": "emoji: 🎉"
}
}</longcat_tool_call>""",
malformed_input_outputs=[
'<longcat_tool_call>{"name": "func", "arguments": {',
(
'<longcat_tool_call>{"name": "func", '
'"arguments": "not a dict"}</longcat_tool_call>'
),
"Some text with <longcat_tool_call>invalid json",
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
single_tool_call_expected_content=None,
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
# xfail markers
xfail_streaming={
"test_malformed_input": "Streaming has complex buffering behavior",
},
xfail_nonstreaming={},
# Configuration
allow_empty_or_json_empty_args=True,
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from unittest.mock import MagicMock
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from vllm.tokenizers import TokenizerLike
class TestPhi4MiniToolParser(ToolParserTests):
@pytest.fixture
def tokenizer(self, default_tokenizer: TokenizerLike) -> TokenizerLike:
"""Add some phi4mini specific tokens to the default vocab."""
tokenizer = default_tokenizer
tokenizer_vocab = tokenizer.get_vocab()
tokenizer.get_vocab = MagicMock()
tokenizer_vocab.update(
{
"functools": 32000,
}
)
tokenizer.get_vocab.return_value = tokenizer_vocab
return tokenizer
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="phi4_mini_json",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
'functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]'
),
parallel_tool_calls_output="""functools[
{"name": "get_weather", "arguments": {"city": "Tokyo"}},
{"name": "get_time", "arguments": {"timezone": "Asia/Tokyo"}}
]""",
various_data_types_output="""functools[{
"name": "test_function",
"arguments": {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": true,
"null_field": null,
"array_field": ["a", "b", "c"],
"object_field": {"nested": "value"},
"empty_array": [],
"empty_object": {}
}
}]""",
empty_arguments_output='functools[{"name": "refresh", "arguments": {}}]',
surrounding_text_output="""Let me check the weather for you.
functools[{"name": "get_weather", "arguments": {"city": "Tokyo"}}]
Would you like to know more?""",
escaped_strings_output="""functools[{
"name": "test_function",
"arguments": {
"quoted": "He said \\"hello\\"",
"path": "C:\\\\Users\\\\file.txt",
"newline": "line1\\nline2",
"unicode": "emoji: 🎉"
}
}]""",
malformed_input_outputs=[
'functools[{"name": "func", "arguments": {',
'functools[{"name": "func", "arguments": "not a dict"}]',
'functools{"name": "func"}', # Missing brackets
'functools[{"name": "func"}]', # Missing arguments/parameters
"functools[] This is just text", # Empty functools
"functools[ This is just text ]", # functools with invalid JSON
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
# Phi-4 Mini strips content when tool calls present
single_tool_call_expected_content=None,
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
parallel_tool_calls_expected_content=None,
# xfail markers
xfail_streaming={
"test_no_tool_calls": "Phi4 Mini streaming not implemented",
"test_single_tool_call_simple_args": (
"Phi4 Mini streaming not implemented"
),
"test_parallel_tool_calls": "Phi4 Mini streaming not implemented",
"test_various_data_types": "Phi4 Mini streaming not implemented",
"test_empty_arguments": "Phi4 Mini streaming not implemented",
"test_surrounding_text": "Phi4 Mini streaming not implemented",
"test_escaped_strings": "Phi4 Mini streaming not implemented",
"test_streaming_reconstruction": "Phi4 Mini streaming not implemented",
},
xfail_nonstreaming={
"test_various_data_types": (
"Phi4MiniJsonToolParser regex has nesting limitations "
"with nested objects"
),
"test_malformed_input": (
"Phi4MiniJsonToolParser incorrectly sets "
"tools_called=True on empty array"
),
},
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
class TestQwen3xmlToolParser(ToolParserTests):
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="qwen3_xml",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output="<tool_call>\n<function=get_weather>\n<parameter=city>Tokyo</parameter>\n</function>\n</tool_call>",
parallel_tool_calls_output="<tool_call>\n<function=get_weather>\n<parameter=city>Tokyo</parameter>\n</function>\n</tool_call><tool_call>\n<function=get_time>\n<parameter=timezone>Asia/Tokyo</parameter>\n</function>\n</tool_call>",
various_data_types_output=(
"<tool_call>\n<function=test_function>\n"
"<parameter=string_field>hello</parameter>\n"
"<parameter=int_field>42</parameter>\n"
"<parameter=float_field>3.14</parameter>\n"
"<parameter=bool_field>true</parameter>\n"
"<parameter=null_field>null</parameter>\n"
'<parameter=array_field>["a", "b", "c"]</parameter>\n'
'<parameter=object_field>{"nested": "value"}</parameter>\n'
"</function>\n</tool_call>"
),
empty_arguments_output="<tool_call>\n<function=refresh>\n</function>\n</tool_call>",
surrounding_text_output=(
"Let me check the weather for you.\n\n"
"<tool_call>\n<function=get_weather>\n"
"<parameter=city>Tokyo</parameter>\n"
"</function>\n</tool_call>\n\n"
"I will get that information."
),
escaped_strings_output=(
"<tool_call>\n<function=test_function>\n"
'<parameter=quoted>He said "hello"</parameter>\n'
"<parameter=path>C:\\Users\\file.txt</parameter>\n"
"<parameter=newline>line1\nline2</parameter>\n"
"</function>\n</tool_call>"
),
malformed_input_outputs=[
"<tool_call><function=func>",
"<tool_call><function=></function></tool_call>",
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
# xfail markers - Qwen3XML has systematic streaming issues
xfail_streaming={
"test_single_tool_call_simple_args": (
"Qwen3XML streaming has systematic issues"
),
"test_parallel_tool_calls": "Qwen3XML streaming has systematic issues",
"test_various_data_types": "Qwen3XML streaming has systematic issues",
"test_empty_arguments": "Qwen3XML streaming has systematic issues",
"test_surrounding_text": "Qwen3XML streaming has systematic issues",
"test_escaped_strings": "Qwen3XML streaming has systematic issues",
"test_malformed_input": (
"Qwen3XML parser is lenient with malformed input"
),
"test_streaming_reconstruction": (
"Qwen3XML streaming reconstruction has known issues"
),
},
supports_typed_arguments=False,
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from tests.tool_parsers.common_tests import (
ToolParserTestConfig,
ToolParserTests,
)
from vllm.tokenizers import TokenizerLike, get_tokenizer
class TestStep3ToolParser(ToolParserTests):
@pytest.fixture(scope="class")
def tokenizer(self) -> TokenizerLike:
return get_tokenizer("stepfun-ai/step3")
@pytest.fixture
def test_config(self) -> ToolParserTestConfig:
return ToolParserTestConfig(
parser_name="step3",
# Test data
no_tool_calls_output="This is a regular response without any tool calls.",
single_tool_call_output=(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
parallel_tool_calls_output=(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_sep|>"
'<|tool_call_begin|><steptml:invoke name="get_time">'
'<steptml:parameter name="timezone">Asia/Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
various_data_types_output=(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="test_function">'
'<steptml:parameter name="string_field">hello</steptml:parameter>'
'<steptml:parameter name="int_field">42</steptml:parameter>'
'<steptml:parameter name="float_field">3.14</steptml:parameter>'
'<steptml:parameter name="bool_field">true</steptml:parameter>'
'<steptml:parameter name="null_field">null</steptml:parameter>'
'<steptml:parameter name="array_field">'
'["a", "b", "c"]</steptml:parameter>'
'<steptml:parameter name="object_field">'
'{"nested": "value"}</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
empty_arguments_output=(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="refresh"></steptml:invoke>'
"<|tool_call_end|><|tool_calls_end|>"
),
surrounding_text_output=(
"Let me check the weather for you.\n\n"
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="get_weather">'
'<steptml:parameter name="city">Tokyo</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>\n\n"
"I'll get that information."
),
escaped_strings_output=(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="test_function">'
'<steptml:parameter name="quoted">He said "hello"</steptml:parameter>'
'<steptml:parameter name="path">C:\\Users\\file.txt</steptml:parameter>'
'<steptml:parameter name="newline">line1\nline2</steptml:parameter>'
"</steptml:invoke><|tool_call_end|><|tool_calls_end|>"
),
malformed_input_outputs=[
(
"<|tool_calls_begin|><|tool_call_begin|>"
'<steptml:invoke name="func">'
),
(
'<|tool_call_begin|><steptml:invoke name="func">'
"</steptml:invoke><|tool_call_end|>"
),
],
# Expected results
single_tool_call_expected_name="get_weather",
single_tool_call_expected_args={"city": "Tokyo"},
parallel_tool_calls_count=2,
parallel_tool_calls_names=["get_weather", "get_time"],
# xfail markers
xfail_nonstreaming={
"test_single_tool_call_simple_args": (
"Step3 parser non-streaming has bugs"
),
"test_parallel_tool_calls": ("Step3 parser non-streaming has bugs"),
"test_various_data_types": "Step3 parser non-streaming has bugs",
"test_empty_arguments": "Step3 parser non-streaming has bugs",
"test_surrounding_text": "Step3 parser non-streaming has bugs",
"test_escaped_strings": "Step3 parser non-streaming has bugs",
},
xfail_streaming={
"test_parallel_tool_calls": (
"Step3 parser has significant bugs in both streaming "
"and non-streaming"
),
"test_streaming_reconstruction": (
"Step3 parser non-streaming has bugs, so streaming "
"doesn't match non-streaming"
),
},
supports_typed_arguments=False,
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment