Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6909a762
Unverified
Commit
6909a762
authored
Mar 30, 2025
by
Julien Denize
Committed by
GitHub
Mar 29, 2025
Browse files
[Bugfix] Fix Mistral guided generation using xgrammar (#15704)
Signed-off-by:
Julien Denize
<
julien.denize@mistral.ai
>
parent
04553371
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
17 deletions
+34
-17
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+23
-10
vllm/v1/structured_output/backend_xgrammar.py
vllm/v1/structured_output/backend_xgrammar.py
+11
-7
No files found.
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
6909a762
...
@@ -15,11 +15,20 @@ from vllm.entrypoints.llm import LLM
...
@@ -15,11 +15,20 @@ from vllm.entrypoints.llm import LLM
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
GUIDED_DECODING_BACKENDS_V1
=
[
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
"xgrammar:disable-any-whitespace"
,
"guidance:disable-any-whitespace"
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"guidance:disable-any-whitespace"
,
"auto"
),
(
"mistralai/Ministral-8B-Instruct-2410"
,
"xgrammar:disable-any-whitespace"
,
"mistral"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"xgrammar:disable-any-whitespace"
,
"auto"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"guidance:disable-any-whitespace"
,
"auto"
),
]
]
MODELS_TO_TEST
=
[
"Qwen/Qwen2.5-1.5B-Instruct"
,
"mistralai/Ministral-8B-Instruct-2410"
PARAMS_MODELS_TOKENIZER_MODE
=
[
(
"mistralai/Ministral-8B-Instruct-2410"
,
"auto"
),
(
"Qwen/Qwen2.5-1.5B-Instruct"
,
"auto"
),
]
]
...
@@ -37,9 +46,8 @@ class CarDescription(BaseModel):
...
@@ -37,9 +46,8 @@ class CarDescription(BaseModel):
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
@
pytest
.
mark
.
parametrize
(
"model_name, guided_decoding_backend, tokenizer_mode"
,
GUIDED_DECODING_BACKENDS_V1
)
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
)
@
pytest
.
mark
.
parametrize
(
"model_name"
,
MODELS_TO_TEST
)
def
test_structured_output
(
def
test_structured_output
(
monkeypatch
:
pytest
.
MonkeyPatch
,
monkeypatch
:
pytest
.
MonkeyPatch
,
sample_json_schema
:
dict
[
str
,
Any
],
sample_json_schema
:
dict
[
str
,
Any
],
...
@@ -49,6 +57,7 @@ def test_structured_output(
...
@@ -49,6 +57,7 @@ def test_structured_output(
sample_regex
:
str
,
sample_regex
:
str
,
sample_guided_choice
:
str
,
sample_guided_choice
:
str
,
guided_decoding_backend
:
str
,
guided_decoding_backend
:
str
,
tokenizer_mode
:
str
,
model_name
:
str
,
model_name
:
str
,
):
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
...
@@ -58,7 +67,8 @@ def test_structured_output(
...
@@ -58,7 +67,8 @@ def test_structured_output(
llm
=
LLM
(
model
=
model_name
,
llm
=
LLM
(
model
=
model_name
,
enforce_eager
=
True
,
enforce_eager
=
True
,
max_model_len
=
1024
,
max_model_len
=
1024
,
guided_decoding_backend
=
guided_decoding_backend
)
guided_decoding_backend
=
guided_decoding_backend
,
tokenizer_mode
=
tokenizer_mode
)
#
#
# Test 1: Generate JSON output based on a provided schema
# Test 1: Generate JSON output based on a provided schema
...
@@ -324,17 +334,20 @@ def test_structured_output(
...
@@ -324,17 +334,20 @@ def test_structured_output(
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
skip_global_cleanup
@
pytest
.
mark
.
parametrize
(
"model_name"
,
MODELS_TO_TEST
)
@
pytest
.
mark
.
parametrize
(
"model_name, tokenizer_mode"
,
PARAMS_MODELS_TOKENIZER_MODE
)
def
test_structured_output_auto_mode
(
def
test_structured_output_auto_mode
(
monkeypatch
:
pytest
.
MonkeyPatch
,
monkeypatch
:
pytest
.
MonkeyPatch
,
unsupported_json_schema
:
dict
[
str
,
Any
],
unsupported_json_schema
:
dict
[
str
,
Any
],
model_name
:
str
,
model_name
:
str
,
tokenizer_mode
:
str
,
):
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
llm
=
LLM
(
model
=
model_name
,
llm
=
LLM
(
model
=
model_name
,
max_model_len
=
1024
,
max_model_len
=
1024
,
guided_decoding_backend
=
"auto"
)
guided_decoding_backend
=
"auto"
,
tokenizer_mode
=
tokenizer_mode
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
temperature
=
1.0
,
temperature
=
1.0
,
...
...
vllm/v1/structured_output/backend_xgrammar.py
View file @
6909a762
...
@@ -42,12 +42,15 @@ class XgrammarBackend(StructuredOutputBackend):
...
@@ -42,12 +42,15 @@ class XgrammarBackend(StructuredOutputBackend):
# NOTE: ideally, xgrammar should handle this accordingly.
# NOTE: ideally, xgrammar should handle this accordingly.
# refer to https://github.com/mlc-ai/xgrammar/blob/d77c0a0173ef14779c918e3be7966ba852f7910f/python/xgrammar/tokenizer_info.py#L98
# refer to https://github.com/mlc-ai/xgrammar/blob/d77c0a0173ef14779c918e3be7966ba852f7910f/python/xgrammar/tokenizer_info.py#L98
try
:
try
:
encoded_vocab
=
[
if
tokenizer
.
is_tekken
:
token
for
token
,
_
in
sorted
(
encoded_vocab
=
tokenizer
.
_vocab
tokenizer
.
get_vocab
().
items
(),
else
:
key
=
lambda
x
:
x
[
1
],
encoded_vocab
=
[
)
token
for
token
,
_
in
sorted
(
]
tokenizer
.
get_vocab
().
items
(),
key
=
lambda
x
:
x
[
1
],
)
]
stop_token_ids
=
None
stop_token_ids
=
None
if
hasattr
(
if
hasattr
(
tokenizer
,
tokenizer
,
...
@@ -62,7 +65,8 @@ class XgrammarBackend(StructuredOutputBackend):
...
@@ -62,7 +65,8 @@ class XgrammarBackend(StructuredOutputBackend):
tokenizer_info
=
xgr
.
TokenizerInfo
(
# type: ignore
tokenizer_info
=
xgr
.
TokenizerInfo
(
# type: ignore
encoded_vocab
=
encoded_vocab
,
encoded_vocab
=
encoded_vocab
,
# NOTE: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501
# NOTE: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501
vocab_type
=
xgr
.
VocabType
.
BYTE_FALLBACK
,
vocab_type
=
xgr
.
VocabType
.
RAW
if
tokenizer
.
is_tekken
else
xgr
.
VocabType
.
BYTE_FALLBACK
,
vocab_size
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
stop_token_ids
=
stop_token_ids
,
stop_token_ids
=
stop_token_ids
,
add_prefix_space
=
True
,
add_prefix_space
=
True
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment