Unverified Commit 653591d5 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Move tokenizer initialization methods (#29793)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent e2fbfc95
...@@ -40,7 +40,7 @@ from vllm.engine.arg_utils import EngineArgs ...@@ -40,7 +40,7 @@ from vllm.engine.arg_utils import EngineArgs
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
try: try:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
except ImportError: except ImportError:
from backend_request_func import get_tokenizer from backend_request_func import get_tokenizer
......
...@@ -46,7 +46,7 @@ from tqdm.asyncio import tqdm ...@@ -46,7 +46,7 @@ from tqdm.asyncio import tqdm
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
try: try:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
except ImportError: except ImportError:
from backend_request_func import get_tokenizer from backend_request_func import get_tokenizer
......
...@@ -8,7 +8,7 @@ import torch ...@@ -8,7 +8,7 @@ import torch
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.config.compilation import CompilationMode, DynamicShapesType from vllm.config.compilation import CompilationMode, DynamicShapesType
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer
......
...@@ -6,7 +6,7 @@ import pytest ...@@ -6,7 +6,7 @@ import pytest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...models.registry import HF_EXAMPLE_MODELS from ...models.registry import HF_EXAMPLE_MODELS
from ...utils import VLLM_PATH from ...utils import VLLM_PATH
......
...@@ -14,7 +14,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion ...@@ -14,7 +14,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
MODEL_NAME = "openai-community/gpt2" MODEL_NAME = "openai-community/gpt2"
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import pytest import pytest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
import pytest import pytest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
......
...@@ -14,7 +14,7 @@ from vllm.config.multimodal import MultiModalConfig ...@@ -14,7 +14,7 @@ from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
......
...@@ -7,7 +7,7 @@ import tempfile ...@@ -7,7 +7,7 @@ import tempfile
import pytest import pytest
from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
......
...@@ -5,7 +5,7 @@ import pytest ...@@ -5,7 +5,7 @@ import pytest
import pytest_asyncio import pytest_asyncio
import requests import requests
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
......
...@@ -271,7 +271,7 @@ async def test_streaming_product_tool_call(): ...@@ -271,7 +271,7 @@ async def test_streaming_product_tool_call():
@pytest.fixture @pytest.fixture
def qwen_tokenizer() -> TokenizerLike: def qwen_tokenizer() -> TokenizerLike:
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
return get_tokenizer("Qwen/Qwen3-32B") return get_tokenizer("Qwen/Qwen3-32B")
......
...@@ -18,7 +18,7 @@ from tests.utils import RemoteOpenAIServer ...@@ -18,7 +18,7 @@ from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.serial_utils import ( from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE, EMBED_DTYPE_TO_TORCH_DTYPE,
ENDIANNESS, ENDIANNESS,
......
...@@ -12,7 +12,7 @@ import torch ...@@ -12,7 +12,7 @@ import torch
from tests.models.utils import check_embeddings_close from tests.models.utils import check_embeddings_close
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.serial_utils import ( from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE, EMBED_DTYPE_TO_TORCH_DTYPE,
ENDIANNESS, ENDIANNESS,
......
...@@ -28,8 +28,7 @@ from vllm.multimodal.utils import ( ...@@ -28,8 +28,7 @@ from vllm.multimodal.utils import (
encode_image_base64, encode_image_base64,
encode_video_base64, encode_video_base64,
) )
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
from ..models.registry import HF_EXAMPLE_MODELS from ..models.registry import HF_EXAMPLE_MODELS
from ..utils import VLLM_PATH from ..utils import VLLM_PATH
......
...@@ -22,11 +22,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict ...@@ -22,11 +22,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import ( from vllm.transformers_utils.tokenizer import encode_tokens
cached_tokenizer_from_config,
encode_tokens,
)
from ....multimodal.utils import random_audio, random_image, random_video from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import ( from ...registry import (
......
...@@ -31,7 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs ...@@ -31,7 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.multimodal.utils import group_mm_kwargs_by_modality from vllm.multimodal.utils import group_mm_kwargs_by_modality
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.tokenizers import cached_tokenizer_from_config
from vllm.utils.collection_utils import is_list_of from vllm.utils.collection_utils import is_list_of
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype
......
...@@ -13,7 +13,7 @@ from transformers import PretrainedConfig ...@@ -13,7 +13,7 @@ from transformers import PretrainedConfig
from vllm.config.model import ModelConfig, ModelDType, RunnerOption from vllm.config.model import ModelConfig, ModelDType, RunnerOption
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
from vllm.multimodal.processing import InputProcessingContext from vllm.multimodal.processing import InputProcessingContext
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.tokenizers import cached_tokenizer_from_config
from .. import ci_envs from .. import ci_envs
from .registry import HF_EXAMPLE_MODELS from .registry import HF_EXAMPLE_MODELS
......
...@@ -7,7 +7,7 @@ from vllm.config import ModelConfig ...@@ -7,7 +7,7 @@ from vllm.config import ModelConfig
from vllm.inputs import zip_enc_dec_prompts from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_raw_prompts from vllm.inputs.parse import parse_raw_prompts
from vllm.inputs.preprocess import InputPreprocessor from vllm.inputs.preprocess import InputPreprocessor
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs from vllm.tokenizers import init_tokenizer_from_config
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test
...@@ -108,7 +108,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs): ...@@ -108,7 +108,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
) )
def test_preprocessor_always_mm_code_path(model_id, prompt): def test_preprocessor_always_mm_code_path(model_id, prompt):
model_config = ModelConfig(model=model_id) model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config) tokenizer = init_tokenizer_from_config(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer) input_preprocessor = InputPreprocessor(model_config, tokenizer)
# HF processor adds sep token # HF processor adds sep token
......
...@@ -5,8 +5,7 @@ from typing import _get_protocol_attrs # type: ignore ...@@ -5,8 +5,7 @@ from typing import _get_protocol_attrs # type: ignore
import pytest import pytest
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
def _get_missing_attrs(obj: object, target: type): def _get_missing_attrs(obj: object, target: type):
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from pathlib import Path from pathlib import Path
from vllm.tokenizers import TokenizerLike, TokenizerRegistry from vllm.tokenizers import TokenizerLike, TokenizerRegistry, get_tokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer
class TestTokenizer(TokenizerLike): class TestTokenizer(TokenizerLike):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment