"tests/test_models_dir_flag.py" did not exist on "4b3cd459dffb08fd462cab9e4df34ca39f4aa7f9"
Unverified Commit 2c301ee2 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[Bugfix] Fix Incremental Detokenization with `tokenizers == 0.22.0` (#24159)


Signed-off-by: default avatarFanli Lin <fanli.lin@intel.com>
Signed-off-by: default avatarFanli Lin <fanli0116@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 3efb9f4d
...@@ -234,7 +234,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer): ...@@ -234,7 +234,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer):
try: try:
token = self.stream.step(self.tokenizer, next_token_id) token = self.stream.step(self.tokenizer, next_token_id)
except Exception as e: except Exception as e:
if str(e) != INVALID_PREFIX_ERR_MSG: if not str(e).startswith(INVALID_PREFIX_ERR_MSG):
raise e raise e
# Recover from edge case where tokenizer can produce non-monotonic, # Recover from edge case where tokenizer can produce non-monotonic,
# invalid UTF-8 output, which breaks the internal state of # invalid UTF-8 output, which breaks the internal state of
...@@ -243,7 +243,8 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer): ...@@ -243,7 +243,8 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer):
logger.warning( logger.warning(
"Encountered invalid prefix detokenization error" "Encountered invalid prefix detokenization error"
" for request %s, resetting decode stream.", self.request_id) " for request %s, resetting decode stream.", self.request_id)
self.stream = DecodeStream(self.skip_special_tokens) self.stream = DecodeStream(
skip_special_tokens=self.skip_special_tokens)
token = self.stream.step(self.tokenizer, next_token_id) token = self.stream.step(self.tokenizer, next_token_id)
return token return token
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment