Fix Max Token ID for Qwen-VL-Chat (#11980)

Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>

Fix Max Token ID for Qwen-VL-Chat (#11980)
Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>
5340a30d · Alex Brooks · GitHub · 89ce62a3 · 5340a30d
Unverified Commit 5340a30d authored Jan 13, 2025 by Alex Brooks Committed by GitHub Jan 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 0 deletions

vllm/transformers_utils/tokenizer.py vllm/transformers_utils/tokenizer.py +9 -0

No files found.
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
+import contextlib
 import os
 import warnings
 from pathlib import Path
@@ -67,7 +68,15 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
        tokenizer.all_special_tokens_extended)
    tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
    tokenizer_len = len(tokenizer)
+
    max_token_id = max(tokenizer.get_vocab().values())
+    # Some tokenizers (e.g., QwenTokenizer) have special tokens that
+    # are added and included in the implementation of the vocab_size
+    # property, but not in get_vocab(); if there is an implementation
+    # of vocab size, we should take the greater value.
+    if hasattr(tokenizer, "vocab_size"):
+        with contextlib.suppress(NotImplementedError):
+            max_token_id = max(max_token_id, tokenizer.vocab_size)

    class CachedTokenizer(tokenizer.__class__):  # type: ignore