Unverified Commit 0aa1153f authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Revert error back into warning for byte fallback conversion. (#22607)

parent 1670be4b
......@@ -19,6 +19,7 @@ All the conversions are grouped here to gather SentencePiece dependencies outsid
allow to make our dependency on SentencePiece optional.
"""
import warnings
from typing import Dict, List, Tuple
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
......@@ -450,7 +451,7 @@ class SpmConverter(Converter):
if self.proto.trainer_spec.byte_fallback:
if not getattr(self, "handle_byte_fallback", None):
raise RuntimeError(
warnings.warn(
"The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option"
" which is not implemented in the fast tokenizers. In practice this means that the fast version of the"
" tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these "
......
......@@ -24,10 +24,12 @@ class ConvertSlowTokenizerTest(unittest.TestCase):
original_tokenizer_with_bytefallback = FakeOriginalTokenizer(vocab_file=spm_model_file_with_bytefallback)
with self.assertRaises(RuntimeError) as cm:
with warnings.catch_warnings(record=True) as w:
_ = SpmConverter(original_tokenizer_with_bytefallback)
self.assertEqual(len(w), 1)
self.assertIn(
"The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option"
" which is not implemented in the fast tokenizers.",
str(cm.exception),
str(w[0].message),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment