Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
653591d5
Unverified
Commit
653591d5
authored
Dec 02, 2025
by
Cyrus Leung
Committed by
GitHub
Dec 02, 2025
Browse files
[Chore] Move tokenizer initialization methods (#29793)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
e2fbfc95
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
24 additions
and
29 deletions
+24
-29
tests/tool_use/test_deepseekv31_tool_parser.py
tests/tool_use/test_deepseekv31_tool_parser.py
+1
-1
tests/tool_use/test_ernie45_moe_tool_parser.py
tests/tool_use/test_ernie45_moe_tool_parser.py
+1
-2
tests/tool_use/test_glm4_moe_tool_parser.py
tests/tool_use/test_glm4_moe_tool_parser.py
+1
-1
tests/tool_use/test_jamba_tool_parser.py
tests/tool_use/test_jamba_tool_parser.py
+1
-2
tests/tool_use/test_kimi_k2_tool_parser.py
tests/tool_use/test_kimi_k2_tool_parser.py
+1
-1
tests/tool_use/test_minimax_tool_parser.py
tests/tool_use/test_minimax_tool_parser.py
+1
-1
tests/tool_use/test_openai_tool_parser.py
tests/tool_use/test_openai_tool_parser.py
+1
-1
tests/tool_use/test_qwen3coder_tool_parser.py
tests/tool_use/test_qwen3coder_tool_parser.py
+1
-2
tests/tool_use/test_seed_oss_tool_parser.py
tests/tool_use/test_seed_oss_tool_parser.py
+1
-2
tests/tool_use/test_xlam_tool_parser.py
tests/tool_use/test_xlam_tool_parser.py
+1
-2
tests/transformers_utils/test_config.py
tests/transformers_utils/test_config.py
+1
-1
tests/utils.py
tests/utils.py
+1
-1
tests/v1/entrypoints/openai/test_completion.py
tests/v1/entrypoints/openai/test_completion.py
+1
-1
tests/v1/tpu/test_perf.py
tests/v1/tpu/test_perf.py
+1
-1
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+1
-1
vllm/model_executor/models/adapters.py
vllm/model_executor/models/adapters.py
+2
-2
vllm/model_executor/models/deepseek_ocr.py
vllm/model_executor/models/deepseek_ocr.py
+1
-1
vllm/model_executor/models/deepseek_vl2.py
vllm/model_executor/models/deepseek_vl2.py
+1
-1
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granite_speech.py
+4
-4
vllm/model_executor/models/gritlm.py
vllm/model_executor/models/gritlm.py
+1
-1
No files found.
tests/tool_use/test_deepseekv31_tool_parser.py
View file @
653591d5
...
@@ -6,7 +6,7 @@ import pytest
...
@@ -6,7 +6,7 @@ import pytest
from
vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser
import
(
from
vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser
import
(
DeepSeekV31ToolParser
,
DeepSeekV31ToolParser
,
)
)
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
MODEL
=
"deepseek-ai/DeepSeek-V3.1"
MODEL
=
"deepseek-ai/DeepSeek-V3.1"
...
...
tests/tool_use/test_ernie45_moe_tool_parser.py
View file @
653591d5
...
@@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser
import
Ernie45ToolParser
from
vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser
import
Ernie45ToolParser
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
# Use a common model that is likely to be available
# Use a common model that is likely to be available
MODEL
=
"baidu/ERNIE-4.5-21B-A3B-Thinking"
MODEL
=
"baidu/ERNIE-4.5-21B-A3B-Thinking"
...
...
tests/tool_use/test_glm4_moe_tool_parser.py
View file @
653591d5
...
@@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
...
@@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from
vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser
import
(
from
vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser
import
(
Glm4MoeModelToolParser
,
Glm4MoeModelToolParser
,
)
)
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_jamba_tool_parser.py
View file @
653591d5
...
@@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
...
@@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
from
vllm.entrypoints.openai.protocol
import
DeltaMessage
,
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.protocol
import
DeltaMessage
,
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.tool_parsers.jamba_tool_parser
import
JambaToolParser
from
vllm.entrypoints.openai.tool_parsers.jamba_tool_parser
import
JambaToolParser
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_kimi_k2_tool_parser.py
View file @
653591d5
...
@@ -8,7 +8,7 @@ import pytest
...
@@ -8,7 +8,7 @@ import pytest
from
vllm.entrypoints.openai.protocol
import
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.protocol
import
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser
import
KimiK2ToolParser
from
vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser
import
KimiK2ToolParser
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_minimax_tool_parser.py
View file @
653591d5
...
@@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.minimax_tool_parser
import
MinimaxToolParser
from
vllm.entrypoints.openai.tool_parsers.minimax_tool_parser
import
MinimaxToolParser
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_openai_tool_parser.py
View file @
653591d5
...
@@ -16,7 +16,7 @@ from openai_harmony import (
...
@@ -16,7 +16,7 @@ from openai_harmony import (
from
vllm.entrypoints.openai.protocol
import
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.protocol
import
FunctionCall
,
ToolCall
from
vllm.entrypoints.openai.tool_parsers.openai_tool_parser
import
OpenAIToolParser
from
vllm.entrypoints.openai.tool_parsers.openai_tool_parser
import
OpenAIToolParser
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
MODEL
=
"gpt2"
MODEL
=
"gpt2"
...
...
tests/tool_use/test_qwen3coder_tool_parser.py
View file @
653591d5
...
@@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
...
@@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser
,
Qwen3CoderToolParser
,
)
)
from
vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser
import
Qwen3XMLToolParser
from
vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser
import
Qwen3XMLToolParser
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_seed_oss_tool_parser.py
View file @
653591d5
...
@@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser
import
SeedOssToolParser
from
vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser
import
SeedOssToolParser
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/tool_use/test_xlam_tool_parser.py
View file @
653591d5
...
@@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
...
@@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall
,
ToolCall
,
)
)
from
vllm.entrypoints.openai.tool_parsers.xlam_tool_parser
import
xLAMToolParser
from
vllm.entrypoints.openai.tool_parsers.xlam_tool_parser
import
xLAMToolParser
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tokenizers
import
TokenizerLike
,
get_tokenizer
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.tokenizers.detokenizer_utils
import
detokenize_incrementally
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
pytestmark
=
pytest
.
mark
.
cpu_test
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
tests/transformers_utils/test_config.py
View file @
653591d5
...
@@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
...
@@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
`vllm.LLMEngine._get_eos_token_id`.
`vllm.LLMEngine._get_eos_token_id`.
"""
"""
from
vllm.tokenizers
import
get_tokenizer
from
vllm.transformers_utils.config
import
try_get_generation_config
from
vllm.transformers_utils.config
import
try_get_generation_config
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
def
test_get_llama3_eos_token
():
def
test_get_llama3_eos_token
():
...
...
tests/utils.py
View file @
653591d5
...
@@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
...
@@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from
vllm.entrypoints.cli.serve
import
ServeSubcommand
from
vllm.entrypoints.cli.serve
import
ServeSubcommand
from
vllm.model_executor.model_loader
import
get_model_loader
from
vllm.model_executor.model_loader
import
get_model_loader
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
from
vllm.utils.mem_constants
import
GB_bytes
from
vllm.utils.mem_constants
import
GB_bytes
from
vllm.utils.network_utils
import
get_open_port
from
vllm.utils.network_utils
import
get_open_port
...
...
tests/v1/entrypoints/openai/test_completion.py
View file @
653591d5
...
@@ -9,7 +9,7 @@ import regex as re
...
@@ -9,7 +9,7 @@ import regex as re
from
openai
import
BadRequestError
from
openai
import
BadRequestError
from
tests.utils
import
RemoteOpenAIServer
from
tests.utils
import
RemoteOpenAIServer
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
# any model with a chat template should work here
# any model with a chat template should work here
MODEL_NAME
=
"facebook/opt-125m"
MODEL_NAME
=
"facebook/opt-125m"
...
...
tests/v1/tpu/test_perf.py
View file @
653591d5
...
@@ -14,7 +14,7 @@ import pytest
...
@@ -14,7 +14,7 @@ import pytest
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
tests.conftest
import
VllmRunner
from
tests.conftest
import
VllmRunner
...
...
vllm/benchmarks/serve.py
View file @
653591d5
...
@@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
...
@@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
)
)
from
vllm.benchmarks.lib.ready_checker
import
wait_for_endpoint
from
vllm.benchmarks.lib.ready_checker
import
wait_for_endpoint
from
vllm.benchmarks.lib.utils
import
convert_to_pytorch_benchmark_format
,
write_to_json
from
vllm.benchmarks.lib.utils
import
convert_to_pytorch_benchmark_format
,
write_to_json
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
from
vllm.utils.gc_utils
import
freeze_gc_heap
from
vllm.utils.gc_utils
import
freeze_gc_heap
from
vllm.utils.network_utils
import
join_host_port
from
vllm.utils.network_utils
import
join_host_port
...
...
vllm/model_executor/models/adapters.py
View file @
653591d5
...
@@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
...
@@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
)
)
loaded_weights
=
pooling_model_cls
.
load_weights
(
model
,
weights
,
load_lm_head
=
True
)
loaded_weights
=
pooling_model_cls
.
load_weights
(
model
,
weights
,
load_lm_head
=
True
)
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model_config
.
tokenizer
,
model_config
.
tokenizer
,
...
@@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
...
@@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
loaded_weights
=
type
(
model
).
__mro__
[
1
].
load_weights
(
model
,
weights
)
loaded_weights
=
type
(
model
).
__mro__
[
1
].
load_weights
(
model
,
weights
)
from
vllm.
transformers_utils.
tokenizer
import
get_tokenizer
from
vllm.tokenizer
s
import
get_tokenizer
tokenizer
=
get_tokenizer
(
tokenizer
=
get_tokenizer
(
model_config
.
tokenizer
,
model_config
.
tokenizer
,
...
...
vllm/model_executor/models/deepseek_ocr.py
View file @
653591d5
...
@@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
...
@@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.tokenizers
import
cached_tokenizer_from_config
from
vllm.transformers_utils.configs.deepseek_vl2
import
DeepseekVLV2Config
from
vllm.transformers_utils.configs.deepseek_vl2
import
DeepseekVLV2Config
from
vllm.transformers_utils.processors.deepseek_ocr
import
(
from
vllm.transformers_utils.processors.deepseek_ocr
import
(
BASE_SIZE
,
BASE_SIZE
,
...
@@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
...
@@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
DeepseekOCRProcessor
,
DeepseekOCRProcessor
,
count_tiles
,
count_tiles
,
)
)
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.sample.logits_processor
import
(
from
vllm.v1.sample.logits_processor
import
(
AdapterLogitsProcessor
,
AdapterLogitsProcessor
,
...
...
vllm/model_executor/models/deepseek_vl2.py
View file @
653591d5
...
@@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
...
@@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
)
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.tokenizers
import
cached_tokenizer_from_config
from
vllm.transformers_utils.configs.deepseek_vl2
import
(
from
vllm.transformers_utils.configs.deepseek_vl2
import
(
DeepseekVLV2Config
,
DeepseekVLV2Config
,
MlpProjectorConfig
,
MlpProjectorConfig
,
VisionEncoderConfig
,
VisionEncoderConfig
,
)
)
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.processors.deepseek_vl2
import
DeepseekVLV2Processor
from
vllm.transformers_utils.tokenizer
import
cached_tokenizer_from_config
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.torch_utils
import
set_default_torch_dtype
from
vllm.utils.torch_utils
import
set_default_torch_dtype
...
...
vllm/model_executor/models/granite_speech.py
View file @
653591d5
...
@@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
...
@@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
)
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.t
ransformers_utils.processor
import
cached_get_processor
from
vllm.t
okenizers
import
cached_tokenizer_from_config
from
vllm.transformers_utils.
tokenize
r
import
cached_
get_tokenizer
from
vllm.transformers_utils.
processo
r
import
cached_
processor_from_config
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
.blip2
import
Blip2QFormerModel
from
.blip2
import
Blip2QFormerModel
...
@@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
...
@@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
else
:
else
:
raise
ValueError
(
f
"Unsupported task type
{
task_type
}
"
)
raise
ValueError
(
f
"Unsupported task type
{
task_type
}
"
)
tokenizer
=
cached_
get_
tokenizer
(
model
_config
.
model
)
tokenizer
=
cached_tokenizer
_from
_config
(
model
_config
)
chat
=
[
dict
(
role
=
"user"
,
content
=
user_prompt
)]
chat
=
[
dict
(
role
=
"user"
,
content
=
user_prompt
)]
prompt
=
tokenizer
.
apply_chat_template
(
prompt
=
tokenizer
.
apply_chat_template
(
chat
,
chat
,
...
@@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
...
@@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
)
->
int
|
None
:
)
->
int
|
None
:
"""Get the number of audio tokens for an audio duration in sec."""
"""Get the number of audio tokens for an audio duration in sec."""
processor
=
cached_
get_
processor
(
model
_config
.
model
)
processor
=
cached_processor
_from
_config
(
model
_config
)
hop_length
=
processor
.
audio_processor
.
melspec_kwargs
[
"hop_length"
]
hop_length
=
processor
.
audio_processor
.
melspec_kwargs
[
"hop_length"
]
proj_win_size
=
processor
.
audio_processor
.
projector_window_size
proj_win_size
=
processor
.
audio_processor
.
projector_window_size
ds_rate
=
processor
.
audio_processor
.
projector_downsample_rate
ds_rate
=
processor
.
audio_processor
.
projector_downsample_rate
...
...
vllm/model_executor/models/gritlm.py
View file @
653591d5
...
@@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
...
@@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
)
)
from
vllm.model_executor.models.llama
import
LlamaForCausalLM
from
vllm.model_executor.models.llama
import
LlamaForCausalLM
from
vllm.tasks
import
PoolingTask
from
vllm.tasks
import
PoolingTask
from
vllm.
transformers_utils.
tokenizer
import
cached_tokenizer_from_config
from
vllm.tokenizer
s
import
cached_tokenizer_from_config
from
vllm.v1.outputs
import
PoolerOutput
from
vllm.v1.outputs
import
PoolerOutput
from
vllm.v1.pool.metadata
import
PoolingMetadata
from
vllm.v1.pool.metadata
import
PoolingMetadata
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment