Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
754b00ed
Unverified
Commit
754b00ed
authored
Jun 26, 2025
by
Michael Goin
Committed by
GitHub
Jun 26, 2025
Browse files
[Bugfix] Fix Mistral tool-parser regex for nested JSON (#20093)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
296ce95d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
2 deletions
+53
-2
tests/models/language/generation/test_mistral.py
tests/models/language/generation/test_mistral.py
+51
-0
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+2
-2
No files found.
tests/models/language/generation/test_mistral.py
View file @
754b00ed
...
...
@@ -10,6 +10,7 @@ import pytest
from
vllm.entrypoints.openai.tool_parsers.mistral_tool_parser
import
(
MistralToolCall
,
MistralToolParser
)
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
vllm.transformers_utils.tokenizer
import
MistralTokenizer
from
...utils
import
check_logprobs_close
...
...
@@ -318,3 +319,53 @@ def test_mistral_guided_decoding(
schema
=
SAMPLE_JSON_SCHEMA
)
except
jsonschema
.
exceptions
.
ValidationError
:
pytest
.
fail
(
"Generated response is not valid with JSON schema"
)
def
test_mistral_function_call_nested_json
():
"""Ensure that the function-name regex captures the entire outer-most
JSON block, including nested braces."""
# Create a minimal stub tokenizer that provides the few attributes the
# parser accesses (`version` and `get_vocab`).
class
_StubMistralTokenizer
(
MistralTokenizer
):
version
=
11
# Satisfy the version check
def
__init__
(
self
):
pass
@
staticmethod
def
get_vocab
():
# Provide the special TOOL_CALLS token expected by the parser.
return
{
"[TOOL_CALLS]"
:
0
}
tokenizer
=
_StubMistralTokenizer
()
parser
=
MistralToolParser
(
tokenizer
)
# Craft a model output featuring nested JSON inside the arguments.
args_dict
=
{
"city"
:
"Dallas"
,
"state"
:
"TX"
,
"unit"
:
"fahrenheit"
,
"sub_dict"
:
{
"foo"
:
"bar"
,
"inner"
:
{
"x"
:
1
,
"y"
:
2
}
},
}
model_output
=
(
f
"
{
parser
.
bot_token
}
get_current_weather
{
json
.
dumps
(
args_dict
)
}
"
)
parsed
=
parser
.
extract_tool_calls
(
model_output
,
None
)
# Assertions: the tool call is detected and the full nested JSON is parsed
# without truncation.
assert
parsed
.
tools_called
assert
MistralToolCall
.
is_valid_id
(
parsed
.
tool_calls
[
0
].
id
)
assert
parsed
.
tool_calls
[
0
].
function
.
name
==
"get_current_weather"
assert
json
.
loads
(
parsed
.
tool_calls
[
0
].
function
.
arguments
)
==
args_dict
# No additional content outside the tool call should be returned.
assert
parsed
.
content
is
None
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
View file @
754b00ed
...
...
@@ -77,8 +77,8 @@ class MistralToolParser(ToolParser):
self
.
bot_token_id
=
self
.
vocab
.
get
(
self
.
bot_token
)
self
.
tool_call_regex
=
re
.
compile
(
r
"\[{.*}\]"
,
re
.
DOTALL
)
if
_is_fn_name_regex_support
(
self
.
model_tokenizer
):
self
.
fn_name_regex
=
re
.
compile
(
r
'([a-zA-Z0-9_-]+)(\{.*?\})'
,
re
.
DOTALL
)
self
.
fn_name_regex
=
re
.
compile
(
r
'([a-zA-Z0-9_-]+)(\{[\s\S]*?\})(?=\s*$|,|\s)'
,
re
.
DOTALL
)
else
:
self
.
fn_name_regex
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment