Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
653591d5
Unverified
Commit
653591d5
authored
Dec 02, 2025
by
Cyrus Leung
Committed by
GitHub
Dec 02, 2025
Browse files
[Chore] Move tokenizer initialization methods (#29793)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
e2fbfc95
Changes
51
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
104 additions
and
72 deletions
+104
-72
vllm/model_executor/models/nano_nemotron_vl.py
vllm/model_executor/models/nano_nemotron_vl.py
+2
-5
vllm/model_executor/models/pixtral.py
vllm/model_executor/models/pixtral.py
+1
-2
vllm/model_executor/models/voxtral.py
vllm/model_executor/models/voxtral.py
+1
-2
vllm/model_executor/models/whisper.py
vllm/model_executor/models/whisper.py
+3
-3
vllm/multimodal/registry.py
vllm/multimodal/registry.py
+1
-2
vllm/tokenizers/__init__.py
vllm/tokenizers/__init__.py
+10
-1
vllm/tokenizers/registry.py
vllm/tokenizers/registry.py
+37
-1
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+43
-48
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+2
-3
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+2
-3
vllm/v1/structured_output/__init__.py
vllm/v1/structured_output/__init__.py
+2
-2
No files found.
vllm/model_executor/models/nano_nemotron_vl.py
View file @
653591d5
...
@@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
...
@@ -73,12 +73,9 @@ from vllm.multimodal.processing import (
)
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
cached_tokenizer_from_config
from
vllm.transformers_utils.configs.radio
import
RadioConfig
from
vllm.transformers_utils.configs.radio
import
RadioConfig
from
vllm.transformers_utils.tokenizer
import
(
from
vllm.transformers_utils.tokenizer
import
encode_tokens
cached_tokenizer_from_config
,
encode_tokens
,
)
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
.utils
import
_merge_multimodal_embeddings
from
.utils
import
_merge_multimodal_embeddings
...
...
vllm/model_executor/models/pixtral.py
View file @
653591d5
...
@@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
...
@@ -59,8 +59,7 @@ from vllm.multimodal.processing import (
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
,
ProcessorInputs
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
,
ProcessorInputs
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.tokenizers
import
MistralTokenizer
from
vllm.tokenizers
import
MistralTokenizer
,
cached_tokenizer_from_config
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
.interfaces
import
MultiModalEmbeddings
,
SupportsMultiModal
,
SupportsPP
from
.interfaces
import
MultiModalEmbeddings
,
SupportsMultiModal
,
SupportsPP
...
...
vllm/model_executor/models/voxtral.py
View file @
653591d5
...
@@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
...
@@ -51,8 +51,7 @@ from vllm.multimodal.processing import (
)
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
,
ProcessorInputs
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
,
ProcessorInputs
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.tokenizers
import
MistralTokenizer
from
vllm.tokenizers
import
MistralTokenizer
,
cached_tokenizer_from_config
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
.interfaces
import
SupportsLoRA
,
SupportsMultiModal
,
SupportsTranscription
from
.interfaces
import
SupportsLoRA
,
SupportsMultiModal
,
SupportsTranscription
from
.utils
import
init_vllm_registered_model
,
maybe_prefix
from
.utils
import
init_vllm_registered_model
,
maybe_prefix
...
...
vllm/model_executor/models/whisper.py
View file @
653591d5
...
@@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
...
@@ -48,7 +48,7 @@ from vllm.multimodal.processing import (
PromptUpdate
,
PromptUpdate
,
)
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.transformers_utils.processor
import
cached_
get_
processor
from
vllm.transformers_utils.processor
import
cached_processor
_from_config
from
vllm.utils.jsontree
import
json_map_leaves
from
vllm.utils.jsontree
import
json_map_leaves
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.torch_utils
import
set_default_torch_dtype
from
vllm.utils.torch_utils
import
set_default_torch_dtype
...
@@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
...
@@ -850,7 +850,7 @@ class WhisperForConditionalGeneration(
def
get_speech_to_text_config
(
def
get_speech_to_text_config
(
cls
,
model_config
:
ModelConfig
,
task_type
:
str
cls
,
model_config
:
ModelConfig
,
task_type
:
str
)
->
SpeechToTextConfig
:
)
->
SpeechToTextConfig
:
processor
=
cached_
get_
processor
(
model
_config
.
model
)
processor
=
cached_processor
_from
_config
(
model
_config
)
return
SpeechToTextConfig
(
return
SpeechToTextConfig
(
max_audio_clip_s
=
processor
.
feature_extractor
.
chunk_length
,
max_audio_clip_s
=
processor
.
feature_extractor
.
chunk_length
,
...
@@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
...
@@ -864,7 +864,7 @@ class WhisperForConditionalGeneration(
stt_config
:
SpeechToTextConfig
,
stt_config
:
SpeechToTextConfig
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
)
->
int
|
None
:
)
->
int
|
None
:
processor
=
cached_
get_
processor
(
model
_config
.
model
)
processor
=
cached_processor
_from
_config
(
model
_config
)
hop_length
=
processor
.
feature_extractor
.
hop_length
hop_length
=
processor
.
feature_extractor
.
hop_length
assert
hop_length
is
not
None
assert
hop_length
is
not
None
# NOTE(NickLucche) user can't pass encoder
# NOTE(NickLucche) user can't pass encoder
...
...
vllm/multimodal/registry.py
View file @
653591d5
...
@@ -6,8 +6,7 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, cast
...
@@ -6,8 +6,7 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar, cast
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
cached_tokenizer_from_config
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
.cache
import
BaseMultiModalProcessorCache
from
.cache
import
BaseMultiModalProcessorCache
from
.processing
import
(
from
.processing
import
(
...
...
vllm/tokenizers/__init__.py
View file @
653591d5
...
@@ -4,12 +4,21 @@
...
@@ -4,12 +4,21 @@
from
.hf
import
HfTokenizer
from
.hf
import
HfTokenizer
from
.mistral
import
MistralTokenizer
from
.mistral
import
MistralTokenizer
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
from
.registry
import
TokenizerRegistry
,
get_tokenizer
from
.registry
import
(
TokenizerRegistry
,
cached_get_tokenizer
,
cached_tokenizer_from_config
,
get_tokenizer
,
init_tokenizer_from_config
,
)
__all__
=
[
__all__
=
[
"TokenizerLike"
,
"TokenizerLike"
,
"HfTokenizer"
,
"HfTokenizer"
,
"MistralTokenizer"
,
"MistralTokenizer"
,
"TokenizerRegistry"
,
"TokenizerRegistry"
,
"cached_get_tokenizer"
,
"get_tokenizer"
,
"get_tokenizer"
,
"cached_tokenizer_from_config"
,
"init_tokenizer_from_config"
,
]
]
vllm/tokenizers/registry.py
View file @
653591d5
...
@@ -2,10 +2,12 @@
...
@@ -2,10 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
importlib.util
import
importlib.util
from
collections.abc
import
Callable
from
collections.abc
import
Callable
from
functools
import
lru_cache
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
TypeVar
,
overload
from
typing
import
TYPE_CHECKING
,
TypeVar
,
overload
import
huggingface_hub
import
huggingface_hub
from
typing_extensions
import
assert_never
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -21,6 +23,9 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
...
@@ -21,6 +23,9 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
from
.protocol
import
TokenizerLike
from
.protocol
import
TokenizerLike
if
TYPE_CHECKING
:
from
vllm.config
import
ModelConfig
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
_T
=
TypeVar
(
"_T"
,
bound
=
type
[
TokenizerLike
])
_T
=
TypeVar
(
"_T"
,
bound
=
type
[
TokenizerLike
])
...
@@ -195,3 +200,34 @@ def get_tokenizer(
...
@@ -195,3 +200,34 @@ def get_tokenizer(
)
)
return
tokenizer
return
tokenizer
cached_get_tokenizer
=
lru_cache
(
get_tokenizer
)
def
cached_tokenizer_from_config
(
model_config
:
"ModelConfig"
,
**
kwargs
):
return
cached_get_tokenizer
(
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
revision
=
model_config
.
tokenizer_revision
,
trust_remote_code
=
model_config
.
trust_remote_code
,
**
kwargs
,
)
def
init_tokenizer_from_config
(
model_config
:
"ModelConfig"
):
runner_type
=
model_config
.
runner_type
if
runner_type
==
"generate"
or
runner_type
==
"draft"
:
truncation_side
=
"left"
elif
runner_type
==
"pooling"
:
truncation_side
=
"right"
else
:
assert_never
(
runner_type
)
return
get_tokenizer
(
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
trust_remote_code
=
model_config
.
trust_remote_code
,
revision
=
model_config
.
tokenizer_revision
,
truncation_side
=
truncation_side
,
)
vllm/transformers_utils/tokenizer.py
View file @
653591d5
...
@@ -2,17 +2,10 @@
...
@@ -2,17 +2,10 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
warnings
import
warnings
from
functools
import
lru_cache
from
typing
import
Any
from
typing
import
TYPE_CHECKING
,
Any
from
typing_extensions
import
assert_never
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers
import
TokenizerLike
if
TYPE_CHECKING
:
from
vllm.config
import
ModelConfig
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -28,18 +21,54 @@ def __getattr__(name: str):
...
@@ -28,18 +21,54 @@ def __getattr__(name: str):
)
)
return
TokenizerLike
return
TokenizerLike
if
name
==
"get_cached_tokenizer"
:
if
name
==
"get_tokenizer"
:
from
vllm.tokenizers.hf
import
get_cached_tokenizer
from
vllm.tokenizers
import
get_tokenizer
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.get_tokenizer` "
"has been moved to `vllm.tokenizers.get_tokenizer`. "
"The old name will be removed in v0.13."
,
DeprecationWarning
,
stacklevel
=
2
,
)
return
get_tokenizer
if
name
==
"cached_get_tokenizer"
:
from
vllm.tokenizers
import
cached_get_tokenizer
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.cached_get_tokenizer` "
"has been moved to `vllm.tokenizers.cached_get_tokenizer`. "
"The old name will be removed in v0.13."
,
DeprecationWarning
,
stacklevel
=
2
,
)
return
cached_get_tokenizer
if
name
==
"cached_tokenizer_from_config"
:
from
vllm.tokenizers
import
cached_tokenizer_from_config
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.cached_tokenizer_from_config` "
"has been moved to `vllm.tokenizers.cached_tokenizer_from_config`. "
"The old name will be removed in v0.13."
,
DeprecationWarning
,
stacklevel
=
2
,
)
return
cached_tokenizer_from_config
if
name
==
"init_tokenizer_from_configs"
:
from
vllm.tokenizers
import
init_tokenizer_from_config
warnings
.
warn
(
warnings
.
warn
(
"`vllm.transformers_utils.tokenizer.
get_cached_tokenizer
` "
"`vllm.transformers_utils.tokenizer.
init_tokenizer_from_configs
` "
"has been moved to `vllm.tokenizers.
hf.get_cached_tokenizer
`. "
"has been moved to `vllm.tokenizers.
init_tokenizer_from_config
`. "
"The old name will be removed in v0.13."
,
"The old name will be removed in v0.13."
,
DeprecationWarning
,
DeprecationWarning
,
stacklevel
=
2
,
stacklevel
=
2
,
)
)
return
get_cached_tokenizer
return
init_tokenizer_from_config
raise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
raise
AttributeError
(
f
"module
{
__name__
!
r
}
has no attribute
{
name
!
r
}
"
)
...
@@ -92,37 +121,3 @@ def encode_tokens(
...
@@ -92,37 +121,3 @@ def encode_tokens(
kw_args
[
"add_special_tokens"
]
=
add_special_tokens
kw_args
[
"add_special_tokens"
]
=
add_special_tokens
return
tokenizer
.
encode
(
text
,
**
kw_args
)
return
tokenizer
.
encode
(
text
,
**
kw_args
)
cached_get_tokenizer
=
lru_cache
(
get_tokenizer
)
def
cached_tokenizer_from_config
(
model_config
:
"ModelConfig"
,
**
kwargs
:
Any
,
):
return
cached_get_tokenizer
(
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
revision
=
model_config
.
tokenizer_revision
,
trust_remote_code
=
model_config
.
trust_remote_code
,
**
kwargs
,
)
def
init_tokenizer_from_configs
(
model_config
:
"ModelConfig"
):
runner_type
=
model_config
.
runner_type
if
runner_type
==
"generate"
or
runner_type
==
"draft"
:
truncation_side
=
"left"
elif
runner_type
==
"pooling"
:
truncation_side
=
"right"
else
:
assert_never
(
runner_type
)
return
get_tokenizer
(
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
trust_remote_code
=
model_config
.
trust_remote_code
,
revision
=
model_config
.
tokenizer_revision
,
truncation_side
=
truncation_side
,
)
vllm/v1/engine/async_llm.py
View file @
653591d5
...
@@ -26,10 +26,9 @@ from vllm.plugins.io_processors import get_io_processor
...
@@ -26,10 +26,9 @@ from vllm.plugins.io_processors import get_io_processor
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tasks
import
SupportedTask
from
vllm.tasks
import
SupportedTask
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
init_tokenizer_from_config
from
vllm.tracing
import
init_tracer
from
vllm.tracing
import
init_tracer
from
vllm.transformers_utils.config
import
maybe_register_config_serialize_by_value
from
vllm.transformers_utils.config
import
maybe_register_config_serialize_by_value
from
vllm.transformers_utils.tokenizer
import
init_tokenizer_from_configs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils.async_utils
import
cancel_task_threadsafe
from
vllm.utils.async_utils
import
cancel_task_threadsafe
from
vllm.utils.collection_utils
import
as_list
from
vllm.utils.collection_utils
import
as_list
...
@@ -112,7 +111,7 @@ class AsyncLLM(EngineClient):
...
@@ -112,7 +111,7 @@ class AsyncLLM(EngineClient):
if
self
.
model_config
.
skip_tokenizer_init
:
if
self
.
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
tokenizer
=
None
else
:
else
:
tokenizer
=
init_tokenizer_from_config
s
(
self
.
model_config
)
tokenizer
=
init_tokenizer_from_config
(
self
.
model_config
)
self
.
input_processor
=
InputProcessor
(
self
.
vllm_config
,
tokenizer
)
self
.
input_processor
=
InputProcessor
(
self
.
vllm_config
,
tokenizer
)
self
.
io_processor
=
get_io_processor
(
self
.
io_processor
=
get_io_processor
(
...
...
vllm/v1/engine/llm_engine.py
View file @
653591d5
...
@@ -23,9 +23,8 @@ from vllm.plugins.io_processors import get_io_processor
...
@@ -23,9 +23,8 @@ from vllm.plugins.io_processors import get_io_processor
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.tasks
import
SupportedTask
from
vllm.tasks
import
SupportedTask
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
init_tokenizer_from_config
from
vllm.tracing
import
init_tracer
from
vllm.tracing
import
init_tracer
from
vllm.transformers_utils.tokenizer
import
init_tokenizer_from_configs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.engine.core_client
import
EngineCoreClient
...
@@ -87,7 +86,7 @@ class LLMEngine:
...
@@ -87,7 +86,7 @@ class LLMEngine:
if
self
.
model_config
.
skip_tokenizer_init
:
if
self
.
model_config
.
skip_tokenizer_init
:
tokenizer
=
None
tokenizer
=
None
else
:
else
:
tokenizer
=
init_tokenizer_from_config
s
(
self
.
model_config
)
tokenizer
=
init_tokenizer_from_config
(
self
.
model_config
)
self
.
input_processor
=
InputProcessor
(
self
.
vllm_config
,
tokenizer
)
self
.
input_processor
=
InputProcessor
(
self
.
vllm_config
,
tokenizer
)
self
.
io_processor
=
get_io_processor
(
self
.
io_processor
=
get_io_processor
(
...
...
vllm/v1/structured_output/__init__.py
View file @
653591d5
...
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING
...
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.reasoning
import
ReasoningParserManager
from
vllm.reasoning
import
ReasoningParserManager
from
vllm.
transformers_utils.
tokenizer
import
init_tokenizer_from_config
s
from
vllm.tokenizer
s
import
init_tokenizer_from_config
from
vllm.utils.import_utils
import
LazyLoader
from
vllm.utils.import_utils
import
LazyLoader
from
vllm.v1.structured_output.backend_guidance
import
GuidanceBackend
from
vllm.v1.structured_output.backend_guidance
import
GuidanceBackend
from
vllm.v1.structured_output.backend_types
import
(
from
vllm.v1.structured_output.backend_types
import
(
...
@@ -61,7 +61,7 @@ class StructuredOutputManager:
...
@@ -61,7 +61,7 @@ class StructuredOutputManager:
# of CPUs.
# of CPUs.
max_workers
=
max
(
1
,
(
multiprocessing
.
cpu_count
()
+
1
)
//
2
)
max_workers
=
max
(
1
,
(
multiprocessing
.
cpu_count
()
+
1
)
//
2
)
self
.
executor
=
ThreadPoolExecutor
(
max_workers
=
max_workers
)
self
.
executor
=
ThreadPoolExecutor
(
max_workers
=
max_workers
)
self
.
tokenizer
=
init_tokenizer_from_config
s
(
self
.
tokenizer
=
init_tokenizer_from_config
(
model_config
=
self
.
vllm_config
.
model_config
model_config
=
self
.
vllm_config
.
model_config
)
)
reasoning_parser
=
(
reasoning_parser
=
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment