# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os import pytest from transformers import AutoTokenizer, PreTrainedTokenizerBase from ..utils import models_path_prefix from vllm.transformers_utils.tokenizer_group import TokenizerGroup # export HF_ENDPOINT=https://hf-mirror.com @pytest.mark.asyncio async def test_tokenizer_group(): # reference_tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, "gpt2")) reference_tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer_group = TokenizerGroup( tokenizer_id=os.path.join(models_path_prefix, "gpt2"), enable_lora=False, max_num_seqs=1, max_input_length=None, ) assert reference_tokenizer.encode("prompt") == tokenizer_group.encode( prompt="prompt", lora_request=None) assert reference_tokenizer.encode( "prompt") == await tokenizer_group.encode_async(prompt="prompt", lora_request=None) assert isinstance(tokenizer_group.get_lora_tokenizer(None), PreTrainedTokenizerBase) assert tokenizer_group.get_lora_tokenizer( None) == await tokenizer_group.get_lora_tokenizer_async(None)