Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f0a28bf6
Unverified
Commit
f0a28bf6
authored
Dec 01, 2025
by
Cyrus Leung
Committed by
GitHub
Dec 01, 2025
Browse files
[Misc] Unify tokenizer registration (#29767)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
86e178f7
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
237 additions
and
183 deletions
+237
-183
tests/entrypoints/openai/test_tokenization.py
tests/entrypoints/openai/test_tokenization.py
+5
-5
tests/entrypoints/pooling/embed/test_online.py
tests/entrypoints/pooling/embed/test_online.py
+1
-1
tests/entrypoints/pooling/pooling/test_online.py
tests/entrypoints/pooling/pooling/test_online.py
+1
-5
tests/models/registry.py
tests/models/registry.py
+1
-1
tests/tokenizers_/test_registry.py
tests/tokenizers_/test_registry.py
+21
-4
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+2
-7
vllm/config/model.py
vllm/config/model.py
+7
-15
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+1
-1
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+1
-1
vllm/tokenizers/__init__.py
vllm/tokenizers/__init__.py
+8
-2
vllm/tokenizers/hf.py
vllm/tokenizers/hf.py
+2
-0
vllm/tokenizers/mistral.py
vllm/tokenizers/mistral.py
+2
-0
vllm/tokenizers/registry.py
vllm/tokenizers/registry.py
+184
-15
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+1
-126
No files found.
tests/entrypoints/openai/test_tokenization.py
View file @
f0a28bf6
...
@@ -53,7 +53,7 @@ async def test_tokenize_completions(
...
@@ -53,7 +53,7 @@ async def test_tokenize_completions(
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
str
,
tokenizer_name
:
str
,
):
):
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
)
for
add_special
in
[
False
,
True
]:
for
add_special
in
[
False
,
True
]:
prompt
=
"vllm1 This is a test prompt."
prompt
=
"vllm1 This is a test prompt."
...
@@ -87,7 +87,7 @@ async def test_tokenize_chat(
...
@@ -87,7 +87,7 @@ async def test_tokenize_chat(
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
str
,
tokenizer_name
:
str
,
):
):
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
)
for
add_generation
in
[
False
,
True
]:
for
add_generation
in
[
False
,
True
]:
for
add_special
in
[
False
,
True
]:
for
add_special
in
[
False
,
True
]:
...
@@ -140,7 +140,7 @@ async def test_tokenize_chat_with_tools(
...
@@ -140,7 +140,7 @@ async def test_tokenize_chat_with_tools(
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
str
,
tokenizer_name
:
str
,
):
):
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
)
for
add_generation
in
[
False
,
True
]:
for
add_generation
in
[
False
,
True
]:
for
add_special
in
[
False
,
True
]:
for
add_special
in
[
False
,
True
]:
...
@@ -210,7 +210,7 @@ async def test_tokenize_with_return_token_strs(
...
@@ -210,7 +210,7 @@ async def test_tokenize_with_return_token_strs(
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
str
,
tokenizer_name
:
str
,
):
):
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
)
prompt
=
"This is a token_strs test prompt! vllm1"
prompt
=
"This is a token_strs test prompt! vllm1"
response
=
requests
.
post
(
response
=
requests
.
post
(
...
@@ -240,7 +240,7 @@ async def test_detokenize(
...
@@ -240,7 +240,7 @@ async def test_detokenize(
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
str
,
tokenizer_name
:
str
,
):
):
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
tokenizer_name
)
prompt
=
"This is a test prompt. vllm1"
prompt
=
"This is a test prompt. vllm1"
tokens
=
tokenizer
.
encode
(
prompt
,
add_special_tokens
=
False
)
tokens
=
tokenizer
.
encode
(
prompt
,
add_special_tokens
=
False
)
...
...
tests/entrypoints/pooling/embed/test_online.py
View file @
f0a28bf6
...
@@ -197,7 +197,7 @@ async def test_conversation_embedding(
...
@@ -197,7 +197,7 @@ async def test_conversation_embedding(
chat_response
.
raise_for_status
()
chat_response
.
raise_for_status
()
chat_embeddings
=
EmbeddingResponse
.
model_validate
(
chat_response
.
json
())
chat_embeddings
=
EmbeddingResponse
.
model_validate
(
chat_response
.
json
())
tokenizer
=
get_tokenizer
(
tokenizer_name
=
model_name
,
tokenizer_mode
=
"fast"
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
model_name
)
prompt
=
tokenizer
.
apply_chat_template
(
prompt
=
tokenizer
.
apply_chat_template
(
messages
,
messages
,
chat_template
=
DUMMY_CHAT_TEMPLATE
,
chat_template
=
DUMMY_CHAT_TEMPLATE
,
...
...
tests/entrypoints/pooling/pooling/test_online.py
View file @
f0a28bf6
...
@@ -158,11 +158,7 @@ async def test_conversation_pooling(server: RemoteOpenAIServer, model_name: str)
...
@@ -158,11 +158,7 @@ async def test_conversation_pooling(server: RemoteOpenAIServer, model_name: str)
chat_response
.
raise_for_status
()
chat_response
.
raise_for_status
()
chat_poolings
=
PoolingResponse
.
model_validate
(
chat_response
.
json
())
chat_poolings
=
PoolingResponse
.
model_validate
(
chat_response
.
json
())
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
tokenizer_name
=
model_name
,
trust_remote_code
=
True
)
tokenizer_name
=
model_name
,
tokenizer_mode
=
"fast"
,
trust_remote_code
=
True
,
)
prompt
=
tokenizer
.
apply_chat_template
(
prompt
=
tokenizer
.
apply_chat_template
(
messages
,
messages
,
chat_template
=
DUMMY_CHAT_TEMPLATE
,
chat_template
=
DUMMY_CHAT_TEMPLATE
,
...
...
tests/models/registry.py
View file @
f0a28bf6
...
@@ -23,7 +23,7 @@ class _HfExamplesInfo:
...
@@ -23,7 +23,7 @@ class _HfExamplesInfo:
tokenizer
:
str
|
None
=
None
tokenizer
:
str
|
None
=
None
"""Set the tokenizer to load for this architecture."""
"""Set the tokenizer to load for this architecture."""
tokenizer_mode
:
TokenizerMode
=
"auto"
tokenizer_mode
:
TokenizerMode
|
str
=
"auto"
"""Set the tokenizer type for this architecture."""
"""Set the tokenizer type for this architecture."""
speculative_model
:
str
|
None
=
None
speculative_model
:
str
|
None
=
None
...
...
tests/tokenizers_/test_registry.py
View file @
f0a28bf6
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
pathlib
import
Path
from
vllm.tokenizers
import
TokenizerLike
,
TokenizerRegistry
from
vllm.tokenizers
import
TokenizerLike
,
TokenizerRegistry
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
class
TestTokenizer
(
TokenizerLike
):
class
TestTokenizer
(
TokenizerLike
):
@
classmethod
@
classmethod
def
from_pretrained
(
cls
,
*
args
,
**
kwargs
)
->
"TestTokenizer"
:
def
from_pretrained
(
return
TestTokenizer
()
# type: ignore
cls
,
path_or_repo_id
:
str
|
Path
,
*
args
,
trust_remote_code
:
bool
=
False
,
revision
:
str
|
None
=
None
,
download_dir
:
str
|
None
=
None
,
**
kwargs
,
)
->
"TestTokenizer"
:
return
TestTokenizer
(
path_or_repo_id
)
# type: ignore
def
__init__
(
self
,
path_or_repo_id
:
str
|
Path
)
->
None
:
super
().
__init__
()
self
.
path_or_repo_id
=
path_or_repo_id
@
property
@
property
def
bos_token_id
(
self
)
->
int
:
def
bos_token_id
(
self
)
->
int
:
...
@@ -29,14 +44,16 @@ class TestTokenizer(TokenizerLike):
...
@@ -29,14 +44,16 @@ class TestTokenizer(TokenizerLike):
def
test_customized_tokenizer
():
def
test_customized_tokenizer
():
TokenizerRegistry
.
register
(
"test_tokenizer"
,
__name__
,
TestTokenizer
.
__name__
)
TokenizerRegistry
.
register
(
"test_tokenizer"
,
__name__
,
TestTokenizer
.
__name__
)
tokenizer
=
TokenizerRegistry
.
get_tokenizer
(
"test_tokenizer"
)
tokenizer
=
TokenizerRegistry
.
get_tokenizer
(
"test_tokenizer"
,
"abc"
)
assert
isinstance
(
tokenizer
,
TestTokenizer
)
assert
isinstance
(
tokenizer
,
TestTokenizer
)
assert
tokenizer
.
path_or_repo_id
==
"abc"
assert
tokenizer
.
bos_token_id
==
0
assert
tokenizer
.
bos_token_id
==
0
assert
tokenizer
.
eos_token_id
==
1
assert
tokenizer
.
eos_token_id
==
1
assert
tokenizer
.
pad_token_id
==
2
assert
tokenizer
.
pad_token_id
==
2
tokenizer
=
get_tokenizer
(
"
test_tokenizer
"
,
tokenizer_mode
=
"
custom
"
)
tokenizer
=
get_tokenizer
(
"
abc
"
,
tokenizer_mode
=
"
test_tokenizer
"
)
assert
isinstance
(
tokenizer
,
TestTokenizer
)
assert
isinstance
(
tokenizer
,
TestTokenizer
)
assert
tokenizer
.
path_or_repo_id
==
"abc"
assert
tokenizer
.
bos_token_id
==
0
assert
tokenizer
.
bos_token_id
==
0
assert
tokenizer
.
eos_token_id
==
1
assert
tokenizer
.
eos_token_id
==
1
assert
tokenizer
.
pad_token_id
==
2
assert
tokenizer
.
pad_token_id
==
2
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
f0a28bf6
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
import
json
import
json
from
enum
import
Enum
from
enum
import
Enum
from
typing
import
TYPE_CHECKING
,
Any
from
typing
import
Any
import
jsonschema
import
jsonschema
import
pytest
import
pytest
...
@@ -24,11 +24,6 @@ from vllm.sampling_params import (
...
@@ -24,11 +24,6 @@ from vllm.sampling_params import (
StructuredOutputsParams
,
StructuredOutputsParams
,
)
)
if
TYPE_CHECKING
:
from
vllm.config.model
import
TokenizerMode
else
:
TokenizerMode
=
str
NGRAM_SPEC_CONFIG
=
{
NGRAM_SPEC_CONFIG
=
{
"model"
:
"[ngram]"
,
"model"
:
"[ngram]"
,
"num_speculative_tokens"
:
5
,
"num_speculative_tokens"
:
5
,
...
@@ -627,7 +622,7 @@ Make the response as short as possible.
...
@@ -627,7 +622,7 @@ Make the response as short as possible.
)
)
def
test_structured_output_with_reasoning_matrices
(
def
test_structured_output_with_reasoning_matrices
(
backend
:
str
,
backend
:
str
,
tokenizer_mode
:
TokenizerMode
,
tokenizer_mode
:
str
,
reasoning_parser
:
str
,
reasoning_parser
:
str
,
model_name
:
str
,
model_name
:
str
,
speculative_config
:
dict
[
str
,
Any
]
|
None
,
speculative_config
:
dict
[
str
,
Any
]
|
None
,
...
...
vllm/config/model.py
View file @
f0a28bf6
...
@@ -86,7 +86,7 @@ TaskOption = Literal[
...
@@ -86,7 +86,7 @@ TaskOption = Literal[
"transcription"
,
"transcription"
,
"draft"
,
"draft"
,
]
]
TokenizerMode
=
Literal
[
"auto"
,
"hf"
,
"slow"
,
"mistral"
,
"custom"
]
TokenizerMode
=
Literal
[
"auto"
,
"hf"
,
"slow"
,
"mistral"
]
ModelDType
=
Literal
[
"auto"
,
"half"
,
"float16"
,
"bfloat16"
,
"float"
,
"float32"
]
ModelDType
=
Literal
[
"auto"
,
"half"
,
"float16"
,
"bfloat16"
,
"float"
,
"float32"
]
LogprobsMode
=
Literal
[
LogprobsMode
=
Literal
[
"raw_logits"
,
"raw_logprobs"
,
"processed_logits"
,
"processed_logprobs"
"raw_logits"
,
"raw_logprobs"
,
"processed_logits"
,
"processed_logprobs"
...
@@ -137,13 +137,13 @@ class ModelConfig:
...
@@ -137,13 +137,13 @@ class ModelConfig:
tokenizer
:
SkipValidation
[
str
]
=
None
# type: ignore
tokenizer
:
SkipValidation
[
str
]
=
None
# type: ignore
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
name or path will be used."""
name or path will be used."""
tokenizer_mode
:
TokenizerMode
=
"auto"
tokenizer_mode
:
TokenizerMode
|
str
=
"auto"
"""Tokenizer mode:
\n
"""Tokenizer mode:
\n
- "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.
\n
- "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.
\n
- "hf" will use the fast tokenizer if available.
\n
- "hf" will use the fast tokenizer if available.
\n
- "slow" will always use the slow tokenizer.
\n
- "slow" will always use the slow tokenizer.
\n
- "mistral" will always use the tokenizer from `mistral_common`.
\n
- "mistral" will always use the tokenizer from `mistral_common`.
\n
-
"custom" will use --tokenizer to select the preregistered tokenizer
."""
-
Other custom values can be supported via plugins
."""
trust_remote_code
:
bool
=
False
trust_remote_code
:
bool
=
False
"""Trust remote code (e.g., from HuggingFace) when downloading the model
"""Trust remote code (e.g., from HuggingFace) when downloading the model
and tokenizer."""
and tokenizer."""
...
@@ -708,9 +708,6 @@ class ModelConfig:
...
@@ -708,9 +708,6 @@ class ModelConfig:
# can be correctly capped to sliding window size
# can be correctly capped to sliding window size
self
.
hf_text_config
.
sliding_window
=
None
self
.
hf_text_config
.
sliding_window
=
None
if
not
self
.
skip_tokenizer_init
:
self
.
_verify_tokenizer_mode
()
# Avoid running try_verify_and_update_config multiple times
# Avoid running try_verify_and_update_config multiple times
self
.
config_updated
=
False
self
.
config_updated
=
False
...
@@ -718,6 +715,10 @@ class ModelConfig:
...
@@ -718,6 +715,10 @@ class ModelConfig:
self
.
_verify_cuda_graph
()
self
.
_verify_cuda_graph
()
self
.
_verify_bnb_config
()
self
.
_verify_bnb_config
()
@
field_validator
(
"tokenizer_mode"
,
mode
=
"after"
)
def
_lowercase_tokenizer_mode
(
cls
,
tokenizer_mode
:
str
)
->
str
:
return
tokenizer_mode
.
lower
()
@
field_validator
(
"quantization"
,
mode
=
"before"
)
@
field_validator
(
"quantization"
,
mode
=
"before"
)
@
classmethod
@
classmethod
def
validate_quantization_before
(
cls
,
value
:
Any
)
->
Any
:
def
validate_quantization_before
(
cls
,
value
:
Any
)
->
Any
:
...
@@ -829,15 +830,6 @@ class ModelConfig:
...
@@ -829,15 +830,6 @@ class ModelConfig:
model
,
_
=
split_remote_gguf
(
model
)
model
,
_
=
split_remote_gguf
(
model
)
return
get_sentence_transformer_tokenizer_config
(
model
,
self
.
revision
)
return
get_sentence_transformer_tokenizer_config
(
model
,
self
.
revision
)
def
_verify_tokenizer_mode
(
self
)
->
None
:
tokenizer_mode
=
cast
(
TokenizerMode
,
self
.
tokenizer_mode
.
lower
())
if
tokenizer_mode
not
in
get_args
(
TokenizerMode
):
raise
ValueError
(
f
"Unknown tokenizer mode:
{
self
.
tokenizer_mode
}
. Must be "
f
"one of
{
get_args
(
TokenizerMode
)
}
."
)
self
.
tokenizer_mode
=
tokenizer_mode
def
_get_default_runner_type
(
def
_get_default_runner_type
(
self
,
self
,
architectures
:
list
[
str
],
architectures
:
list
[
str
],
...
...
vllm/engine/arg_utils.py
View file @
f0a28bf6
...
@@ -360,7 +360,7 @@ class EngineArgs:
...
@@ -360,7 +360,7 @@ class EngineArgs:
task
:
TaskOption
|
None
=
ModelConfig
.
task
task
:
TaskOption
|
None
=
ModelConfig
.
task
skip_tokenizer_init
:
bool
=
ModelConfig
.
skip_tokenizer_init
skip_tokenizer_init
:
bool
=
ModelConfig
.
skip_tokenizer_init
enable_prompt_embeds
:
bool
=
ModelConfig
.
enable_prompt_embeds
enable_prompt_embeds
:
bool
=
ModelConfig
.
enable_prompt_embeds
tokenizer_mode
:
TokenizerMode
=
ModelConfig
.
tokenizer_mode
tokenizer_mode
:
TokenizerMode
|
str
=
ModelConfig
.
tokenizer_mode
trust_remote_code
:
bool
=
ModelConfig
.
trust_remote_code
trust_remote_code
:
bool
=
ModelConfig
.
trust_remote_code
allowed_local_media_path
:
str
=
ModelConfig
.
allowed_local_media_path
allowed_local_media_path
:
str
=
ModelConfig
.
allowed_local_media_path
allowed_media_domains
:
list
[
str
]
|
None
=
ModelConfig
.
allowed_media_domains
allowed_media_domains
:
list
[
str
]
|
None
=
ModelConfig
.
allowed_media_domains
...
...
vllm/entrypoints/llm.py
View file @
f0a28bf6
...
@@ -188,7 +188,7 @@ class LLM:
...
@@ -188,7 +188,7 @@ class LLM:
runner
:
RunnerOption
=
"auto"
,
runner
:
RunnerOption
=
"auto"
,
convert
:
ConvertOption
=
"auto"
,
convert
:
ConvertOption
=
"auto"
,
tokenizer
:
str
|
None
=
None
,
tokenizer
:
str
|
None
=
None
,
tokenizer_mode
:
TokenizerMode
=
"auto"
,
tokenizer_mode
:
TokenizerMode
|
str
=
"auto"
,
skip_tokenizer_init
:
bool
=
False
,
skip_tokenizer_init
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
allowed_local_media_path
:
str
=
""
,
allowed_local_media_path
:
str
=
""
,
...
...
vllm/tokenizers/__init__.py
View file @
f0a28bf6
...
@@ -4,6 +4,12 @@
...
@@ -4,6 +4,12 @@
from
.hf
import
HfTokenizer
from
.hf
import
HfTokenizer
from
.mistral
import
MistralTokenizer
from
.mistral
import
MistralTokenizer
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
from
.registry
import
TokenizerRegistry
from
.registry
import
TokenizerRegistry
,
get_tokenizer
__all__
=
[
"TokenizerLike"
,
"HfTokenizer"
,
"MistralTokenizer"
,
"TokenizerRegistry"
]
__all__
=
[
"TokenizerLike"
,
"HfTokenizer"
,
"MistralTokenizer"
,
"TokenizerRegistry"
,
"get_tokenizer"
,
]
vllm/tokenizers/hf.py
View file @
f0a28bf6
...
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer
...
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer
from
vllm.transformers_utils.config
import
get_sentence_transformer_tokenizer_config
from
vllm.transformers_utils.config
import
get_sentence_transformer_tokenizer_config
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
from
.registry
import
TokenizerRegistry
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
...
@@ -67,6 +68,7 @@ def get_cached_tokenizer(
...
@@ -67,6 +68,7 @@ def get_cached_tokenizer(
return
cached_tokenizer
# type: ignore
return
cached_tokenizer
# type: ignore
@
TokenizerRegistry
.
register
(
"hf"
)
class
HfTokenizer
(
TokenizerLike
):
class
HfTokenizer
(
TokenizerLike
):
@
classmethod
@
classmethod
def
from_pretrained
(
def
from_pretrained
(
...
...
vllm/tokenizers/mistral.py
View file @
f0a28bf6
...
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, cast
...
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, cast
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
from
.registry
import
TokenizerRegistry
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
mistral_common.protocol.instruct.request
import
(
from
mistral_common.protocol.instruct.request
import
(
...
@@ -165,6 +166,7 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int:
...
@@ -165,6 +166,7 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int:
return
tokenizer
.
unk_id
return
tokenizer
.
unk_id
@
TokenizerRegistry
.
register
(
"mistral"
)
class
MistralTokenizer
(
TokenizerLike
):
class
MistralTokenizer
(
TokenizerLike
):
@
classmethod
@
classmethod
def
from_pretrained
(
def
from_pretrained
(
...
...
vllm/tokenizers/registry.py
View file @
f0a28bf6
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
importlib
import
importlib.util
from
collections.abc
import
Callable
from
pathlib
import
Path
from
typing
import
TypeVar
,
overload
import
huggingface_hub
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.gguf_utils
import
get_gguf_file_path_from_hf
from
vllm.transformers_utils.repo_utils
import
list_filtered_repo_files
from
vllm.transformers_utils.utils
import
(
check_gguf_file
,
is_gguf
,
is_remote_gguf
,
split_remote_gguf
,
)
from
vllm.utils.import_utils
import
resolve_obj_by_qualname
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
logger
=
init_logger
(
__name__
)
_T
=
TypeVar
(
"_T"
,
bound
=
type
[
TokenizerLike
])
class
TokenizerRegistry
:
class
TokenizerRegistry
:
# Tokenizer name -> (tokenizer module, tokenizer class)
# Tokenizer name ->
tokenizer_cls or
(tokenizer module, tokenizer class)
REGISTRY
:
dict
[
str
,
tuple
[
str
,
str
]]
=
{}
REGISTRY
:
dict
[
str
,
type
[
TokenizerLike
]
|
tuple
[
str
,
str
]]
=
{}
# In-tree tokenizers
@
staticmethod
@
staticmethod
def
register
(
name
:
str
,
module
:
str
,
class_name
:
str
)
->
None
:
@
overload
TokenizerRegistry
.
REGISTRY
[
name
]
=
(
module
,
class_name
)
def
register
(
tokenizer_mode
:
str
)
->
Callable
[[
_T
],
_T
]:
...
# OOT tokenizers
@
staticmethod
@
staticmethod
def
get_tokenizer
(
@
overload
tokenizer_name
:
str
,
def
register
(
tokenizer_mode
:
str
,
module
:
str
,
class_name
:
str
)
->
None
:
...
@
staticmethod
def
register
(
tokenizer_mode
:
str
,
module
:
str
|
None
=
None
,
class_name
:
str
|
None
=
None
,
)
->
Callable
[[
_T
],
_T
]
|
None
:
# In-tree tokenizers
if
module
is
None
or
class_name
is
None
:
def
wrapper
(
tokenizer_cls
:
_T
)
->
_T
:
assert
tokenizer_mode
not
in
TokenizerRegistry
.
REGISTRY
TokenizerRegistry
.
REGISTRY
[
tokenizer_mode
]
=
tokenizer_cls
return
tokenizer_cls
return
wrapper
# OOT tokenizers
if
tokenizer_mode
in
TokenizerRegistry
.
REGISTRY
:
logger
.
warning
(
"%s.%s is already registered for tokenizer_mode=%r. "
"It is overwritten by the new one."
,
module
,
class_name
,
tokenizer_mode
,
)
TokenizerRegistry
.
REGISTRY
[
tokenizer_mode
]
=
(
module
,
class_name
)
return
None
@
staticmethod
def
get_tokenizer
(
tokenizer_mode
:
str
,
*
args
,
**
kwargs
)
->
"TokenizerLike"
:
if
tokenizer_mode
not
in
TokenizerRegistry
.
REGISTRY
:
raise
ValueError
(
f
"No tokenizer registered for
{
tokenizer_mode
=
!
r
}
."
)
item
=
TokenizerRegistry
.
REGISTRY
[
tokenizer_mode
]
if
isinstance
(
item
,
type
):
return
item
.
from_pretrained
(
*
args
,
**
kwargs
)
module
,
class_name
=
item
logger
.
debug_once
(
f
"Loading
{
class_name
}
for
{
tokenizer_mode
=
!
r
}
"
)
class_
=
resolve_obj_by_qualname
(
f
"
{
module
}
.
{
class_name
}
"
)
return
class_
.
from_pretrained
(
*
args
,
**
kwargs
)
def
get_tokenizer
(
tokenizer_name
:
str
|
Path
,
*
args
,
*
args
,
tokenizer_mode
:
str
=
"auto"
,
trust_remote_code
:
bool
=
False
,
revision
:
str
|
None
=
None
,
download_dir
:
str
|
None
=
None
,
**
kwargs
,
**
kwargs
,
)
->
"TokenizerLike"
:
)
->
TokenizerLike
:
tokenizer_cls
=
TokenizerRegistry
.
REGISTRY
.
get
(
tokenizer_name
)
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope."""
if
tokenizer_cls
is
None
:
if
envs
.
VLLM_USE_MODELSCOPE
:
raise
ValueError
(
f
"Tokenizer
{
tokenizer_name
}
not found."
)
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
from
modelscope.hub.snapshot_download
import
snapshot_download
tokenizer_module
=
importlib
.
import_module
(
tokenizer_cls
[
0
])
# avoid circular import
class_
=
getattr
(
tokenizer_module
,
tokenizer_cls
[
1
])
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
return
class_
.
from_pretrained
(
*
args
,
**
kwargs
)
# Only set the tokenizer here, model will be downloaded on the workers.
if
not
Path
(
tokenizer_name
).
exists
():
# Use file lock to prevent multiple processes from
# downloading the same file at the same time.
with
get_lock
(
tokenizer_name
,
download_dir
):
tokenizer_path
=
snapshot_download
(
model_id
=
str
(
tokenizer_name
),
cache_dir
=
download_dir
,
revision
=
revision
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
# Ignore weights - we only need the tokenizer.
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
],
)
tokenizer_name
=
tokenizer_path
if
tokenizer_mode
==
"slow"
:
if
kwargs
.
get
(
"use_fast"
,
False
):
raise
ValueError
(
"Cannot use the fast tokenizer in slow tokenizer mode."
)
tokenizer_mode
=
"hf"
kwargs
[
"use_fast"
]
=
False
if
"truncation_side"
not
in
kwargs
:
kwargs
[
"truncation_side"
]
=
"left"
# Separate model folder from file path for GGUF models
if
is_gguf
(
tokenizer_name
):
if
check_gguf_file
(
tokenizer_name
):
kwargs
[
"gguf_file"
]
=
Path
(
tokenizer_name
).
name
tokenizer_name
=
Path
(
tokenizer_name
).
parent
elif
is_remote_gguf
(
tokenizer_name
):
tokenizer_name
,
quant_type
=
split_remote_gguf
(
tokenizer_name
)
# Get the HuggingFace Hub path for the GGUF file
gguf_file
=
get_gguf_file_path_from_hf
(
tokenizer_name
,
quant_type
,
revision
=
revision
,
)
kwargs
[
"gguf_file"
]
=
gguf_file
# Try to use official Mistral tokenizer if possible
if
tokenizer_mode
==
"auto"
and
importlib
.
util
.
find_spec
(
"mistral_common"
):
allow_patterns
=
[
"tekken.json"
,
"tokenizer.model.v*"
]
files_list
=
list_filtered_repo_files
(
model_name_or_path
=
str
(
tokenizer_name
),
allow_patterns
=
allow_patterns
,
revision
=
revision
,
)
if
len
(
files_list
)
>
0
:
tokenizer_mode
=
"mistral"
# Fallback to HF tokenizer
if
tokenizer_mode
==
"auto"
:
tokenizer_mode
=
"hf"
tokenizer_args
=
(
tokenizer_name
,
*
args
)
tokenizer_kwargs
=
dict
(
trust_remote_code
=
trust_remote_code
,
revision
=
revision
,
download_dir
=
download_dir
,
**
kwargs
,
)
if
tokenizer_mode
==
"custom"
:
logger
.
warning_once
(
"TokenizerRegistry now uses `tokenizer_mode` as the registry key "
"instead of `tokenizer_name`. "
"Please update the definition of `.from_pretrained` in "
"your custom tokenizer to accept `args=%s`, `kwargs=%s`. "
"Then, you can pass `tokenizer_mode=%r` instead of "
"`tokenizer_mode='custom'` when initializing vLLM."
,
tokenizer_args
,
str
(
tokenizer_kwargs
),
tokenizer_mode
,
)
tokenizer_mode
=
str
(
tokenizer_name
)
tokenizer
=
TokenizerRegistry
.
get_tokenizer
(
tokenizer_mode
,
*
tokenizer_args
,
**
tokenizer_kwargs
,
)
if
not
tokenizer
.
is_fast
:
logger
.
warning
(
"Using a slow tokenizer. This might cause a significant "
"slowdown. Consider using a fast tokenizer instead."
)
return
tokenizer
vllm/transformers_utils/tokenizer.py
View file @
f0a28bf6
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
importlib.util
import
os
import
warnings
import
warnings
from
functools
import
lru_cache
from
functools
import
lru_cache
from
pathlib
import
Path
from
typing
import
TYPE_CHECKING
,
Any
from
typing
import
TYPE_CHECKING
,
Any
import
huggingface_hub
from
typing_extensions
import
assert_never
from
typing_extensions
import
assert_never
from
vllm
import
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
(
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
HfTokenizer
,
MistralTokenizer
,
TokenizerLike
,
TokenizerRegistry
,
)
from
.gguf_utils
import
get_gguf_file_path_from_hf
from
.repo_utils
import
list_filtered_repo_files
from
.utils
import
check_gguf_file
,
is_gguf
,
is_remote_gguf
,
split_remote_gguf
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
...
@@ -108,117 +94,6 @@ def encode_tokens(
...
@@ -108,117 +94,6 @@ def encode_tokens(
return
tokenizer
.
encode
(
text
,
**
kw_args
)
return
tokenizer
.
encode
(
text
,
**
kw_args
)
def
get_tokenizer
(
tokenizer_name
:
str
|
Path
,
*
args
,
tokenizer_mode
:
str
=
"auto"
,
trust_remote_code
:
bool
=
False
,
revision
:
str
|
None
=
None
,
download_dir
:
str
|
None
=
None
,
**
kwargs
,
)
->
TokenizerLike
:
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope."""
if
envs
.
VLLM_USE_MODELSCOPE
:
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
# pylint: disable=C.
from
modelscope.hub.snapshot_download
import
snapshot_download
# avoid circuit import
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
# Only set the tokenizer here, model will be downloaded on the workers.
if
not
os
.
path
.
exists
(
tokenizer_name
):
# Use file lock to prevent multiple processes from
# downloading the same file at the same time.
with
get_lock
(
tokenizer_name
,
download_dir
):
tokenizer_path
=
snapshot_download
(
model_id
=
tokenizer_name
,
cache_dir
=
download_dir
,
revision
=
revision
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
# Ignore weights - we only need the tokenizer.
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
],
)
tokenizer_name
=
tokenizer_path
if
tokenizer_mode
==
"slow"
:
if
kwargs
.
get
(
"use_fast"
,
False
):
raise
ValueError
(
"Cannot use the fast tokenizer in slow tokenizer mode."
)
kwargs
[
"use_fast"
]
=
False
if
"truncation_side"
not
in
kwargs
:
kwargs
[
"truncation_side"
]
=
"left"
# Separate model folder from file path for GGUF models
if
is_gguf
(
tokenizer_name
):
if
check_gguf_file
(
tokenizer_name
):
kwargs
[
"gguf_file"
]
=
Path
(
tokenizer_name
).
name
tokenizer_name
=
Path
(
tokenizer_name
).
parent
elif
is_remote_gguf
(
tokenizer_name
):
tokenizer_name
,
quant_type
=
split_remote_gguf
(
tokenizer_name
)
# Get the HuggingFace Hub path for the GGUF file
gguf_file
=
get_gguf_file_path_from_hf
(
tokenizer_name
,
quant_type
,
revision
=
revision
,
)
kwargs
[
"gguf_file"
]
=
gguf_file
# if `tokenizer_mode` == "auto", check if tokenizer can be loaded via Mistral format
# first to use official Mistral tokenizer if possible.
mistral_common_installed
=
importlib
.
util
.
find_spec
(
"mistral_common"
)
is
not
None
if
tokenizer_mode
==
"auto"
and
mistral_common_installed
:
allow_patterns
=
[
"tekken.json"
,
"tokenizer.model.v*"
]
files_list
=
list_filtered_repo_files
(
model_name_or_path
=
str
(
tokenizer_name
),
allow_patterns
=
allow_patterns
,
revision
=
revision
,
)
if
len
(
files_list
)
>
0
:
tokenizer_mode
=
"mistral"
tokenizer
:
TokenizerLike
if
tokenizer_mode
==
"mistral"
:
logger
.
debug_once
(
f
"Loading MistralTokenizer from
{
tokenizer_name
}
"
)
tokenizer
=
MistralTokenizer
.
from_pretrained
(
tokenizer_name
,
*
args
,
trust_remote_code
=
trust_remote_code
,
revision
=
revision
,
download_dir
=
download_dir
,
**
kwargs
,
)
elif
tokenizer_mode
==
"custom"
:
logger
.
debug_once
(
f
"Loading CustomTokenizer from
{
tokenizer_name
}
"
)
tokenizer
=
TokenizerRegistry
.
get_tokenizer
(
str
(
tokenizer_name
),
*
args
,
trust_remote_code
=
trust_remote_code
,
revision
=
revision
,
download_dir
=
download_dir
,
**
kwargs
,
)
else
:
logger
.
debug_once
(
f
"Loading HfTokenizer from
{
tokenizer_name
}
"
)
tokenizer
=
HfTokenizer
.
from_pretrained
(
tokenizer_name
,
*
args
,
trust_remote_code
=
trust_remote_code
,
revision
=
revision
,
download_dir
=
download_dir
,
**
kwargs
,
)
if
not
tokenizer
.
is_fast
:
logger
.
warning
(
"Using a slow tokenizer. This might cause a significant "
"slowdown. Consider using a fast tokenizer instead."
)
return
tokenizer
cached_get_tokenizer
=
lru_cache
(
get_tokenizer
)
cached_get_tokenizer
=
lru_cache
(
get_tokenizer
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment