import pytest import os from transformers import PreTrainedTokenizerBase from vllm.transformers_utils.tokenizer import get_tokenizer from ..utils import models_path_prefix # TOKENIZER_NAMES = [ # os.path.join(models_path_prefix, "facebook/opt-125m"), # os.path.join(models_path_prefix, "gpt2"), # ] # export HF_ENDPOINT=https://hf-mirror.com TOKENIZER_NAMES = [ "facebook/opt-125m", "gpt2", ] @pytest.mark.parametrize("tokenizer_name", TOKENIZER_NAMES) def test_tokenizer_revision(tokenizer_name: str): # Assume that "main" branch always exists # tokenizer = get_tokenizer(tokenizer_name, revision="main") tokenizer = get_tokenizer(tokenizer_name) assert isinstance(tokenizer, PreTrainedTokenizerBase) # Assume that "never" branch always does not exist with pytest.raises(OSError, match='not a valid git identifier'): get_tokenizer(tokenizer_name, revision="never")