Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ca1a00a3
Commit
ca1a00a3
authored
Aug 30, 2019
by
thomwolf
Browse files
fix for python2
parent
4e6a3172
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
2 deletions
+4
-2
pytorch_transformers/tests/tokenization_gpt2_test.py
pytorch_transformers/tests/tokenization_gpt2_test.py
+3
-2
pytorch_transformers/tests/tokenization_roberta_test.py
pytorch_transformers/tests/tokenization_roberta_test.py
+1
-0
No files found.
pytorch_transformers/tests/tokenization_gpt2_test.py
View file @
ca1a00a3
...
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
...
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import
os
import
os
import
unittest
import
unittest
import
json
import
json
from
io
import
open
from
pytorch_transformers.tokenization_gpt2
import
GPT2Tokenizer
,
VOCAB_FILES_NAMES
from
pytorch_transformers.tokenization_gpt2
import
GPT2Tokenizer
,
VOCAB_FILES_NAMES
...
@@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester):
...
@@ -55,8 +56,8 @@ class GPT2TokenizationTest(CommonTestCases.CommonTokenizerTester):
def
test_full_tokenizer
(
self
):
def
test_full_tokenizer
(
self
):
tokenizer
=
GPT2Tokenizer
(
self
.
vocab_file
,
self
.
merges_file
,
**
self
.
special_tokens_map
)
tokenizer
=
GPT2Tokenizer
(
self
.
vocab_file
,
self
.
merges_file
,
**
self
.
special_tokens_map
)
text
=
"lower"
text
=
"lower
newer
"
bpe_tokens
=
[
"
\u0120
low"
,
"er"
]
bpe_tokens
=
[
"
\u0120
low"
,
"er"
,
"
\u0120
newer"
]
tokens
=
tokenizer
.
tokenize
(
text
)
tokens
=
tokenizer
.
tokenize
(
text
)
self
.
assertListEqual
(
tokens
,
bpe_tokens
)
self
.
assertListEqual
(
tokens
,
bpe_tokens
)
...
...
pytorch_transformers/tests/tokenization_roberta_test.py
View file @
ca1a00a3
...
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
...
@@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import
os
import
os
import
json
import
json
import
unittest
import
unittest
from
io
import
open
from
pytorch_transformers.tokenization_roberta
import
RobertaTokenizer
,
VOCAB_FILES_NAMES
from
pytorch_transformers.tokenization_roberta
import
RobertaTokenizer
,
VOCAB_FILES_NAMES
from
.tokenization_tests_commons
import
CommonTestCases
from
.tokenization_tests_commons
import
CommonTestCases
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment