"...glm130b_fastertransformer.git" did not exist on "f8a481f890bd74375e57e5b4430e47696253ad96"
Commit dd2add9f authored by Pascal Voitot's avatar Pascal Voitot Committed by Lysandre Debut
Browse files

more tests

parent df160af7
......@@ -109,7 +109,7 @@ class BertTokenizationTest(CommonTestCases.CommonTokenizerTester):
decoded = tokenizer.decode(encoded)
self.assertEqual(
decoded.lower(),
(f"[CLS] {input.lower()} [SEP]").lower()
(f"[CLS] {input} [SEP]").lower()
)
......
......@@ -67,6 +67,20 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester):
self.assertListEqual(
tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
def test_encode_decode_with_spaces(self):
tokenizer = self.get_tokenizer()
new_toks = ['[ABC]', '[DEF]', 'GHI IHG']
tokenizer.add_tokens(new_toks)
input = "lower newer [ABC] [DEF] newer lower [ABC] GHI IHG newer lower[DEF]"
encoded = tokenizer.encode(input)
decoded = tokenizer.decode(encoded)
self.assertEqual(
decoded.lower(),
input.lower()
)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment