Commit ca1a00a3 authored by thomwolf's avatar thomwolf
Browse files

fix for python2

parent 4e6a3172
...@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import os import os
import unittest import unittest
import json import json
from io import open
from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, VOCAB_FILES_NAMES
...@@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester): ...@@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester):
def test_full_tokenizer(self): def test_full_tokenizer(self):
tokenizer = GPT2Tokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map) tokenizer = GPT2Tokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
text = "lower" text = "lower newer"
bpe_tokens = ["\u0120low", "er"] bpe_tokens = ["\u0120low", "er", "\u0120newer"]
tokens = tokenizer.tokenize(text) tokens = tokenizer.tokenize(text)
self.assertListEqual(tokens, bpe_tokens) self.assertListEqual(tokens, bpe_tokens)
......
...@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import os import os
import json import json
import unittest import unittest
from io import open
from pytorch_transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES from pytorch_transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES
from .tokenization_tests_commons import CommonTestCases from .tokenization_tests_commons import CommonTestCases
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment