Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6110c39d
Unverified
Commit
6110c39d
authored
Mar 29, 2024
by
Roy
Committed by
GitHub
Mar 29, 2024
Browse files
[BugFix] Fix tokenizer out of vocab size (#3685)
parent
d8658c8c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
19 deletions
+14
-19
tests/tokenization/test_detokenize.py
tests/tokenization/test_detokenize.py
+7
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+0
-8
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+0
-8
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+7
-3
No files found.
tests/tokenization/test_detokenize.py
View file @
6110c39d
...
@@ -83,6 +83,13 @@ def test_decode_streaming(tokenizer_id, truth, with_prompt,
...
@@ -83,6 +83,13 @@ def test_decode_streaming(tokenizer_id, truth, with_prompt,
assert
decoded_text
==
generated
assert
decoded_text
==
generated
decoded_text
=
_run_incremental_decode
(
tokenizer
,
[
len
(
tokenizer
)],
skip_special_tokens
=
skip_special_tokens
,
starting_index
=
starting_index
)
assert
decoded_text
==
''
@
pytest
.
fixture
@
pytest
.
fixture
def
detokenizer
(
tokenizer_name
:
str
)
->
Detokenizer
:
def
detokenizer
(
tokenizer_name
:
str
)
->
Detokenizer
:
...
...
vllm/engine/llm_engine.py
View file @
6110c39d
...
@@ -222,14 +222,6 @@ class LLMEngine:
...
@@ -222,14 +222,6 @@ class LLMEngine:
self
.
tokenizer
:
BaseTokenizerGroup
=
get_tokenizer_group
(
self
.
tokenizer
:
BaseTokenizerGroup
=
get_tokenizer_group
(
self
.
parallel_config
.
tokenizer_pool_config
,
**
init_kwargs
)
self
.
parallel_config
.
tokenizer_pool_config
,
**
init_kwargs
)
if
len
(
self
.
get_tokenizer
())
!=
self
.
model_config
.
get_vocab_size
():
logger
.
warning
(
f
"The tokenizer's vocabulary size
{
len
(
self
.
get_tokenizer
())
}
"
f
" does not match the model's vocabulary size "
f
"
{
self
.
model_config
.
get_vocab_size
()
}
. This might "
f
"cause an error in decoding. Please change config.json "
"to match the tokenizer's vocabulary size."
)
def
_verify_args
(
self
)
->
None
:
def
_verify_args
(
self
)
->
None
:
self
.
model_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
self
.
model_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
self
.
cache_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
self
.
cache_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
6110c39d
...
@@ -68,14 +68,6 @@ class OpenAIServing:
...
@@ -68,14 +68,6 @@ class OpenAIServing:
tokenizer_mode
=
engine_model_config
.
tokenizer_mode
,
tokenizer_mode
=
engine_model_config
.
tokenizer_mode
,
trust_remote_code
=
engine_model_config
.
trust_remote_code
)
trust_remote_code
=
engine_model_config
.
trust_remote_code
)
if
len
(
self
.
tokenizer
)
!=
engine_model_config
.
get_vocab_size
():
logger
.
warning
(
f
"The tokenizer's vocabulary size
{
len
(
self
.
tokenizer
)
}
"
f
" does not match the model's vocabulary size "
f
"
{
engine_model_config
.
get_vocab_size
()
}
. This might "
f
"cause an error in decoding. Please change config.json "
"to match the tokenizer's vocabulary size."
)
async
def
show_available_models
(
self
)
->
ModelList
:
async
def
show_available_models
(
self
)
->
ModelList
:
"""Show available models. Right now we only have one model."""
"""Show available models. Right now we only have one model."""
model_cards
=
[
model_cards
=
[
...
...
vllm/transformers_utils/tokenizer.py
View file @
6110c39d
...
@@ -232,9 +232,13 @@ def detokenize_incrementally(
...
@@ -232,9 +232,13 @@ def detokenize_incrementally(
all_input_ids
[:
-
1
],
all_input_ids
[:
-
1
],
skip_special_tokens
=
skip_special_tokens
)
skip_special_tokens
=
skip_special_tokens
)
# Put new_token_id in a list so skip_special_tokens is respected
# If the new token id is out of bounds, return an empty string.
new_tokens
=
tokenizer
.
convert_ids_to_tokens
(
if
new_token_id
>=
len
(
tokenizer
):
[
new_token_id
],
skip_special_tokens
=
skip_special_tokens
)
new_tokens
=
[
""
]
else
:
# Put new_token_id in a list so skip_special_tokens is respected
new_tokens
=
tokenizer
.
convert_ids_to_tokens
(
[
new_token_id
],
skip_special_tokens
=
skip_special_tokens
)
output_tokens
=
prev_tokens
+
new_tokens
output_tokens
=
prev_tokens
+
new_tokens
# If this is the first iteration, return all tokens.
# If this is the first iteration, return all tokens.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment