Unverified Commit e1625498 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update where `bytes_to_unicode` is imported from (#30771)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 0b0acc75
...@@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput ...@@ -21,8 +21,8 @@ from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
if TYPE_CHECKING: if TYPE_CHECKING:
import outlines_core as oc import outlines_core as oc
import transformers.file_utils as file_utils import transformers.file_utils as file_utils
import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2
import xgrammar as xgr import xgrammar as xgr
from transformers.convert_slow_tokenizer import bytes_to_unicode
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.v1.worker.gpu_input_batch import InputBatch from vllm.v1.worker.gpu_input_batch import InputBatch
...@@ -30,10 +30,8 @@ else: ...@@ -30,10 +30,8 @@ else:
xgr = LazyLoader("xgr", globals(), "xgrammar") xgr = LazyLoader("xgr", globals(), "xgrammar")
oc = LazyLoader("oc", globals(), "outlines_core") oc = LazyLoader("oc", globals(), "outlines_core")
file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils") file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
tokenization_gpt2 = LazyLoader( bytes_to_unicode = LazyLoader(
"tokenization_gpt2", "bytes_to_unicode", globals(), "transformers.convert_slow_tokenizer"
globals(),
"transformers.models.gpt2.tokenization_gpt2",
) )
TokenizerLike = object TokenizerLike = object
...@@ -204,7 +202,7 @@ def _reduced_vocabulary( ...@@ -204,7 +202,7 @@ def _reduced_vocabulary(
A Dict of token string -> equivalent token ids A Dict of token string -> equivalent token ids
""" """
unicode_to_bytes = {v: k for k, v in tokenization_gpt2.bytes_to_unicode().items()} unicode_to_bytes = {v: k for k, v in bytes_to_unicode().items()}
def convert_token_to_string(token: str) -> str: def convert_token_to_string(token: str) -> str:
string = tokenizer.convert_tokens_to_string([token]) string = tokenizer.convert_tokens_to_string([token])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment