Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f20f9f06
Unverified
Commit
f20f9f06
authored
Jun 05, 2025
by
Patrick von Platen
Committed by
GitHub
Jun 05, 2025
Browse files
[mistral_common] Add v11 tokenizer (#19193)
Signed-off-by:
Patrick von Platen
<
patrick.v.platen@gmail.com
>
parent
9bc8bb07
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
4 deletions
+32
-4
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+30
-4
vllm/transformers_utils/tokenizers/mistral.py
vllm/transformers_utils/tokenizers/mistral.py
+2
-0
No files found.
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
View file @
f20f9f06
...
...
@@ -44,11 +44,17 @@ class MistralToolCall(ToolCall):
return
id
.
isalnum
()
and
len
(
id
)
==
9
def
_is_fn_name_regex_support
(
model_tokenizer
:
AnyTokenizer
)
->
bool
:
return
isinstance
(
model_tokenizer
,
MistralTokenizer
)
\
and
model_tokenizer
.
version
>=
11
@
ToolParserManager
.
register_module
(
"mistral"
)
class
MistralToolParser
(
ToolParser
):
"""
Tool call parser for Mistral 7B Instruct v0.3, intended for use with the
examples/tool_chat_template_mistral.jinja template.
Tool call parser for Mistral 7B Instruct v0.3, intended for use with
- [`mistral_common`](https://github.com/mistralai/mistral-common/)
- the examples/tool_chat_template_mistral.jinja template.
Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
"""
...
...
@@ -70,6 +76,12 @@ class MistralToolParser(ToolParser):
self
.
bot_token
=
"[TOOL_CALLS]"
self
.
bot_token_id
=
self
.
vocab
.
get
(
self
.
bot_token
)
self
.
tool_call_regex
=
re
.
compile
(
r
"\[{.*}\]"
,
re
.
DOTALL
)
if
_is_fn_name_regex_support
(
self
.
model_tokenizer
):
self
.
fn_name_regex
=
re
.
compile
(
r
'([a-zA-Z0-9_-]+)(\{.*?\})'
,
re
.
DOTALL
)
else
:
self
.
fn_name_regex
=
None
if
self
.
bot_token_id
is
None
:
raise
RuntimeError
(
"Mistral Tool Parser could not locate the tool call token in "
...
...
@@ -109,10 +121,24 @@ class MistralToolParser(ToolParser):
tool_content
=
model_output
.
replace
(
self
.
bot_token
,
""
).
strip
()
try
:
# we first try to directly load the json as parsing very nested
# jsons is difficult
try
:
if
self
.
fn_name_regex
:
matches
=
self
.
fn_name_regex
.
findall
(
tool_content
)
function_call_arr
=
[]
for
match
in
matches
:
fn_name
=
match
[
0
]
args
=
match
[
1
]
# fn_name is encoded outside serialized json dump
# only arguments are serialized
function_call_arr
.
append
({
"name"
:
fn_name
,
"arguments"
:
json
.
loads
(
args
)
})
else
:
function_call_arr
=
json
.
loads
(
tool_content
)
except
json
.
JSONDecodeError
:
# use a regex to find the part corresponding to the tool call.
...
...
vllm/transformers_utils/tokenizers/mistral.py
View file @
f20f9f06
...
...
@@ -187,6 +187,8 @@ class MistralTokenizer(TokenizerBase):
def
__init__
(
self
,
tokenizer
:
"PublicMistralTokenizer"
)
->
None
:
self
.
mistral
=
tokenizer
self
.
instruct
=
tokenizer
.
instruct_tokenizer
_mistral_version_str
=
self
.
instruct
.
tokenizer
.
version
.
value
self
.
version
:
int
=
int
(
_mistral_version_str
.
split
(
"v"
)[
-
1
])
tokenizer_
=
tokenizer
.
instruct_tokenizer
.
tokenizer
from
mistral_common.tokens.tokenizers.tekken
import
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment