Unverified Commit 653591d5 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Move tokenizer initialization methods (#29793)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent e2fbfc95
......@@ -6,7 +6,7 @@ import pytest
from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
DeepSeekV31ToolParser,
)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
MODEL = "deepseek-ai/DeepSeek-V3.1"
......
......@@ -14,9 +14,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
# Use a common model that is likely to be available
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
......
......@@ -10,7 +10,7 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser,
)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -10,9 +10,8 @@ from partial_json_parser.core.options import Allow
from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -8,7 +8,7 @@ import pytest
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -13,7 +13,7 @@ from vllm.entrypoints.openai.protocol import (
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -16,7 +16,7 @@ from openai_harmony import (
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
MODEL = "gpt2"
......
......@@ -17,9 +17,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser,
)
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -15,9 +15,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -13,9 +13,8 @@ from vllm.entrypoints.openai.protocol import (
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import get_tokenizer
pytestmark = pytest.mark.cpu_test
......
......@@ -6,8 +6,8 @@ only get the `eos_token_id` from the tokenizer as defined by
`vllm.LLMEngine._get_eos_token_id`.
"""
from vllm.tokenizers import get_tokenizer
from vllm.transformers_utils.config import try_get_generation_config
from vllm.transformers_utils.tokenizer import get_tokenizer
def test_get_llama3_eos_token():
......
......@@ -44,7 +44,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.cli.serve import ServeSubcommand
from vllm.model_executor.model_loader import get_model_loader
from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.mem_constants import GB_bytes
from vllm.utils.network_utils import get_open_port
......
......@@ -9,7 +9,7 @@ import regex as re
from openai import BadRequestError
from tests.utils import RemoteOpenAIServer
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
# any model with a chat template should work here
MODEL_NAME = "facebook/opt-125m"
......
......@@ -14,7 +14,7 @@ import pytest
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
if TYPE_CHECKING:
from tests.conftest import VllmRunner
......
......@@ -47,7 +47,7 @@ from vllm.benchmarks.lib.endpoint_request_func import (
)
from vllm.benchmarks.lib.ready_checker import wait_for_endpoint
from vllm.benchmarks.lib.utils import convert_to_pytorch_benchmark_format, write_to_json
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
from vllm.utils.gc_utils import freeze_gc_heap
from vllm.utils.network_utils import join_host_port
......
......@@ -444,7 +444,7 @@ def load_weights_using_from_2_way_softmax(
)
loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer(
model_config.tokenizer,
......@@ -498,7 +498,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
# Skip ModelForSequenceClassification in MRO to avoid infinite recursion
loaded_weights = type(model).__mro__[1].load_weights(model, weights)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.tokenizers import get_tokenizer
tokenizer = get_tokenizer(
model_config.tokenizer,
......
......@@ -45,6 +45,7 @@ from vllm.multimodal.processing import (
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
from vllm.transformers_utils.processors.deepseek_ocr import (
BASE_SIZE,
......@@ -53,7 +54,6 @@ from vllm.transformers_utils.processors.deepseek_ocr import (
DeepseekOCRProcessor,
count_tiles,
)
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.v1.sample.logits_processor import (
AdapterLogitsProcessor,
......
......@@ -41,13 +41,13 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.configs.deepseek_vl2 import (
DeepseekVLV2Config,
MlpProjectorConfig,
VisionEncoderConfig,
)
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype
......
......@@ -59,8 +59,8 @@ from vllm.multimodal.processing import (
)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.tokenizer import cached_get_tokenizer
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .blip2 import Blip2QFormerModel
......@@ -862,7 +862,7 @@ class GraniteSpeechForConditionalGeneration(
else:
raise ValueError(f"Unsupported task type {task_type}")
tokenizer = cached_get_tokenizer(model_config.model)
tokenizer = cached_tokenizer_from_config(model_config)
chat = [dict(role="user", content=user_prompt)]
prompt = tokenizer.apply_chat_template(
chat,
......@@ -886,7 +886,7 @@ class GraniteSpeechForConditionalGeneration(
model_config: ModelConfig,
) -> int | None:
"""Get the number of audio tokens for an audio duration in sec."""
processor = cached_get_processor(model_config.model)
processor = cached_processor_from_config(model_config)
hop_length = processor.audio_processor.melspec_kwargs["hop_length"]
proj_win_size = processor.audio_processor.projector_window_size
ds_rate = processor.audio_processor.projector_downsample_rate
......
......@@ -19,7 +19,7 @@ from vllm.model_executor.layers.pooler import (
)
from vllm.model_executor.models.llama import LlamaForCausalLM
from vllm.tasks import PoolingTask
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.v1.outputs import PoolerOutput
from vllm.v1.pool.metadata import PoolingMetadata
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment