"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "c849a61e6569209f8199d8d4c56b2f171af9e32d"
Unverified Commit 588e6caa authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

Overwrite get_clean_sequence as this was causing a bottleneck (#13183)

parent 14373821
......@@ -15,6 +15,7 @@
import unittest
from typing import Tuple
from transformers import AddedToken, LukeTokenizer
from transformers.testing_utils import require_torch, slow
......@@ -81,6 +82,11 @@ class Luke(TokenizerTesterMixin, unittest.TestCase):
assert encoded_sentence == encoded_text_from_decode
assert encoded_pair == encoded_pair_from_decode
def get_clean_sequence(self, tokenizer, max_length=20) -> Tuple[str, list]:
txt = "Beyonce lives in Los Angeles"
ids = tokenizer.encode(txt, add_special_tokens=False)
return txt, ids
def test_space_encoding(self):
tokenizer = self.get_tokenizer()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment