Commit cc228089 authored by VictorSanh's avatar VictorSanh
Browse files

Small fix to ensure Python3 compativility.

parent ebfffa0a
...@@ -30,13 +30,13 @@ class TokenizationTest(unittest.TestCase): ...@@ -30,13 +30,13 @@ class TokenizationTest(unittest.TestCase):
"[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
"##ing", "," "##ing", ","
] ]
with tempfile.NamedTemporaryFile(delete=False) as vocab_writer: with open("/tmp/bert_tokenizer_test.txt", "w") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
vocab_file = vocab_writer.name vocab_file = vocab_writer.name
tokenizer = tokenization.FullTokenizer(vocab_file) tokenizer = tokenization.FullTokenizer(vocab_file)
os.unlink(vocab_file) os.remove(vocab_file)
tokens = tokenizer.tokenize(u"UNwant\u00E9d,running") tokens = tokenizer.tokenize(u"UNwant\u00E9d,running")
self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment