Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ce50305e
Unverified
Commit
ce50305e
authored
Dec 22, 2019
by
Aymeric Augustin
Committed by
GitHub
Dec 22, 2019
Browse files
Merge pull request #2270 from aaugustin/remove-python-2
Remove support for Python 2
parents
b6ea0f43
1a948d70
Changes
155
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
20 additions
and
56 deletions
+20
-56
tests/test_tokenization_albert.py
tests/test_tokenization_albert.py
+1
-1
tests/test_tokenization_auto.py
tests/test_tokenization_auto.py
+1
-1
tests/test_tokenization_bert.py
tests/test_tokenization_bert.py
+1
-2
tests/test_tokenization_bert_japanese.py
tests/test_tokenization_bert_japanese.py
+1
-2
tests/test_tokenization_common.py
tests/test_tokenization_common.py
+5
-30
tests/test_tokenization_ctrl.py
tests/test_tokenization_ctrl.py
+1
-2
tests/test_tokenization_distilbert.py
tests/test_tokenization_distilbert.py
+1
-1
tests/test_tokenization_gpt2.py
tests/test_tokenization_gpt2.py
+1
-2
tests/test_tokenization_openai.py
tests/test_tokenization_openai.py
+1
-1
tests/test_tokenization_roberta.py
tests/test_tokenization_roberta.py
+1
-2
tests/test_tokenization_t5.py
tests/test_tokenization_t5.py
+1
-1
tests/test_tokenization_transfo_xl.py
tests/test_tokenization_transfo_xl.py
+1
-2
tests/test_tokenization_utils.py
tests/test_tokenization_utils.py
+2
-7
tests/test_tokenization_xlm.py
tests/test_tokenization_xlm.py
+1
-1
tests/test_tokenization_xlnet.py
tests/test_tokenization_xlnet.py
+1
-1
No files found.
tests/test_tokenization_albert.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
...
...
tests/test_tokenization_auto.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
logging
import
unittest
...
...
tests/test_tokenization_bert.py
View file @
ce50305e
...
...
@@ -12,11 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_bert
import
(
VOCAB_FILES_NAMES
,
...
...
tests/test_tokenization_bert_japanese.py
View file @
ce50305e
...
...
@@ -12,11 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_bert
import
WordpieceTokenizer
from
transformers.tokenization_bert_japanese
import
(
...
...
tests/test_tokenization_common.py
View file @
ce50305e
...
...
@@ -12,34 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
pickle
import
shutil
import
sys
import
tempfile
from
io
import
open
if
sys
.
version_info
[
0
]
==
2
:
import
cPickle
as
pickle
class
TemporaryDirectory
(
object
):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def
__enter__
(
self
):
self
.
name
=
tempfile
.
mkdtemp
()
return
self
.
name
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
shutil
.
rmtree
(
self
.
name
)
else
:
import
pickle
TemporaryDirectory
=
tempfile
.
TemporaryDirectory
unicode
=
str
class
TokenizerTesterMixin
:
...
...
@@ -90,7 +68,7 @@ class TokenizerTesterMixin:
before_tokens
=
tokenizer
.
encode
(
"He is very happy, UNwant
\u00E9
d,running"
,
add_special_tokens
=
False
)
with
TemporaryDirectory
()
as
tmpdirname
:
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
tokenizer
.
save_pretrained
(
tmpdirname
)
tokenizer
=
self
.
tokenizer_class
.
from_pretrained
(
tmpdirname
)
...
...
@@ -108,7 +86,7 @@ class TokenizerTesterMixin:
text
=
"Munich and Berlin are nice cities"
subwords
=
tokenizer
.
tokenize
(
text
)
with
TemporaryDirectory
()
as
tmpdirname
:
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
filename
=
os
.
path
.
join
(
tmpdirname
,
"tokenizer.bin"
)
with
open
(
filename
,
"wb"
)
as
handle
:
...
...
@@ -246,7 +224,7 @@ class TokenizerTesterMixin:
self
.
assertEqual
(
text_2
,
output_text
)
self
.
assertNotEqual
(
len
(
tokens_2
),
0
)
self
.
assertIsInstance
(
text_2
,
(
str
,
unicode
)
)
self
.
assertIsInstance
(
text_2
,
str
)
def
test_encode_decode_with_spaces
(
self
):
tokenizer
=
self
.
get_tokenizer
()
...
...
@@ -268,9 +246,6 @@ class TokenizerTesterMixin:
self
.
assertListEqual
(
weights_list
,
weights_list_2
)
def
test_mask_output
(
self
):
if
sys
.
version_info
<=
(
3
,
0
):
return
tokenizer
=
self
.
get_tokenizer
()
if
tokenizer
.
build_inputs_with_special_tokens
.
__qualname__
.
split
(
"."
)[
0
]
!=
"PreTrainedTokenizer"
:
...
...
tests/test_tokenization_ctrl.py
View file @
ce50305e
...
...
@@ -11,12 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_ctrl
import
VOCAB_FILES_NAMES
,
CTRLTokenizer
...
...
tests/test_tokenization_distilbert.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
from
transformers.tokenization_distilbert
import
DistilBertTokenizer
...
...
tests/test_tokenization_gpt2.py
View file @
ce50305e
...
...
@@ -12,12 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_gpt2
import
VOCAB_FILES_NAMES
,
GPT2Tokenizer
...
...
tests/test_tokenization_openai.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
os
...
...
tests/test_tokenization_roberta.py
View file @
ce50305e
...
...
@@ -12,12 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_roberta
import
VOCAB_FILES_NAMES
,
RobertaTokenizer
...
...
tests/test_tokenization_t5.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
...
...
tests/test_tokenization_transfo_xl.py
View file @
ce50305e
...
...
@@ -12,11 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
from
io
import
open
from
transformers
import
is_torch_available
...
...
tests/test_tokenization_utils.py
View file @
ce50305e
...
...
@@ -12,11 +12,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
unittest
import
six
import
unittest
from
transformers
import
PreTrainedTokenizer
from
transformers.tokenization_gpt2
import
GPT2Tokenizer
...
...
@@ -34,10 +32,7 @@ class TokenizerUtilsTest(unittest.TestCase):
self
.
assertIsInstance
(
tokenizer
,
PreTrainedTokenizer
)
for
special_tok
in
tokenizer
.
all_special_tokens
:
if
six
.
PY2
:
self
.
assertIsInstance
(
special_tok
,
unicode
)
# noqa: F821
else
:
self
.
assertIsInstance
(
special_tok
,
str
)
self
.
assertIsInstance
(
special_tok
,
str
)
special_tok_id
=
tokenizer
.
convert_tokens_to_ids
(
special_tok
)
self
.
assertIsInstance
(
special_tok_id
,
int
)
...
...
tests/test_tokenization_xlm.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
os
...
...
tests/test_tokenization_xlnet.py
View file @
ce50305e
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
...
...
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment