Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
39956efb
Unverified
Commit
39956efb
authored
May 07, 2025
by
Qiong Zhou Huang
Committed by
GitHub
May 07, 2025
Browse files
[Bugfix] Fix bad words for Mistral models (#17753)
Signed-off-by:
Qiong Zhou Huang
<
qiong@phonic.co
>
parent
597051e5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
18 deletions
+9
-18
vllm/logits_process.py
vllm/logits_process.py
+7
-10
vllm/sampling_params.py
vllm/sampling_params.py
+2
-8
No files found.
vllm/logits_process.py
View file @
39956efb
...
...
@@ -4,11 +4,12 @@ from typing import Callable, Union
import
torch
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
MistralTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
LogitsProcessor
=
Union
[
Callable
[[
list
[
int
],
torch
.
Tensor
],
torch
.
Tensor
],
Callable
[[
list
[
int
],
list
[
int
],
torch
.
Tensor
],
torch
.
Tensor
]]
LogitsProcessor
=
Union
[
Callable
[[
list
[
int
],
torch
.
Tensor
],
torch
.
Tensor
],
Callable
[[
list
[
int
],
list
[
int
],
torch
.
Tensor
],
torch
.
Tensor
],
]
"""LogitsProcessor is a function that takes a list
of previously generated tokens, the logits tensor
for the next token and, optionally, prompt tokens as a
...
...
@@ -29,10 +30,6 @@ def get_bad_words_logits_processors(
prefix
=
" "
if
add_prefix_space
else
""
prompt
=
prefix
+
bad_word
.
lstrip
()
if
isinstance
(
tokenizer
,
MistralTokenizer
):
# Mistral tokenizers should not add special tokens
prompt_token_ids
=
tokenizer
.
encode
(
text
=
prompt
)
else
:
prompt_token_ids
=
tokenizer
.
encode
(
text
=
prompt
,
add_special_tokens
=
False
)
...
...
vllm/sampling_params.py
View file @
39956efb
...
...
@@ -13,7 +13,6 @@ from typing_extensions import deprecated
from
vllm.logger
import
init_logger
from
vllm.logits_process
import
LogitsProcessor
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizers.mistral
import
MistralTokenizer
logger
=
init_logger
(
__name__
)
...
...
@@ -491,13 +490,8 @@ class SamplingParams(
for
add_prefix_space
in
[
False
,
True
]:
prefix
=
" "
if
add_prefix_space
else
""
prompt
=
prefix
+
bad_word
.
lstrip
()
if
isinstance
(
tokenizer
,
MistralTokenizer
):
# Mistral tokenizers should not add special tokens
prompt_token_ids
=
tokenizer
.
encode
(
text
=
prompt
)
else
:
prompt_token_ids
=
tokenizer
.
encode
(
text
=
prompt
,
add_special_tokens
=
False
)
prompt_token_ids
=
tokenizer
.
encode
(
text
=
prompt
,
add_special_tokens
=
False
)
# If no space at the beginning
# or if prefix space produces a new word token
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment