Update where `bytes_to_unicode` is imported from (#30771)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Update where `bytes_to_unicode` is imported from (#30771)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
e1625498 · Harry Mellor · GitHub · 0b0acc75 · e1625498
Unverified Commit e1625498 authored Dec 16, 2025 by Harry Mellor Committed by GitHub Dec 16, 2025
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 6 deletions

vllm/v1/structured_output/utils.py vllm/v1/structured_output/utils.py +4 -6

No files found.
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
 if TYPE_CHECKING:
    import outlines_core as oc
    import transformers.file_utils as file_utils
-    import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2
    import xgrammar as xgr
+    from transformers.convert_slow_tokenizer import bytes_to_unicode
    from vllm.tokenizers import TokenizerLike
    from vllm.v1.worker.gpu_input_batch import InputBatch
@@ -30,10 +30,8 @@ else:
    xgr = LazyLoader("xgr", globals(), "xgrammar")
    oc = LazyLoader("oc", globals(), "outlines_core")
    file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
-    tokenization_gpt2 = LazyLoader(
+    bytes_to_unicode = LazyLoader(
-        "tokenization_gpt2",
+        "bytes_to_unicode", globals(), "transformers.convert_slow_tokenizer"
-        globals(),
-        "transformers.models.gpt2.tokenization_gpt2",
    )
    TokenizerLike = object
@@ -204,7 +202,7 @@ def _reduced_vocabulary(
        A Dict of token string -> equivalent token ids
    """
-    unicode_to_bytes = {v: k for k, v in tokenization_gpt2.bytes_to_unicode().items()}
+    unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()}
    def convert_token_to_string(token: str) -> str:
        string = tokenizer.convert_tokens_to_string([token])