Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1d8c2323
Commit
1d8c2323
authored
Apr 03, 2019
by
thomwolf
Browse files
Fix #436
parent
846b1fd6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
5 deletions
+15
-5
pytorch_pretrained_bert/tokenization.py
pytorch_pretrained_bert/tokenization.py
+15
-5
No files found.
pytorch_pretrained_bert/tokenization.py
View file @
1d8c2323
...
...
@@ -105,8 +105,8 @@ class BertTokenizer(object):
self
.
max_len
=
max_len
if
max_len
is
not
None
else
int
(
1e12
)
def
tokenize
(
self
,
text
):
if
self
.
do_basic_tokenize
:
split_tokens
=
[]
if
self
.
do_basic_tokenize
:
for
token
in
self
.
basic_tokenizer
.
tokenize
(
text
):
for
sub_token
in
self
.
wordpiece_tokenizer
.
tokenize
(
token
):
split_tokens
.
append
(
sub_token
)
...
...
@@ -142,6 +142,16 @@ class BertTokenizer(object):
"""
if
pretrained_model_name_or_path
in
PRETRAINED_VOCAB_ARCHIVE_MAP
:
vocab_file
=
PRETRAINED_VOCAB_ARCHIVE_MAP
[
pretrained_model_name_or_path
]
if
'-cased'
in
pretrained_model_name_or_path
and
kwargs
.
get
(
'do_lower_case'
,
True
):
logger
.
warning
(
"The pre-trained model you are loading is a cased model but you have not set "
"`do_lower_case` to False. We are setting `do_lower_case=False` for you but "
"you may want to check this behavior."
)
kwargs
[
'do_lower_case'
]
=
False
elif
'-cased'
not
in
pretrained_model_name_or_path
and
not
kwargs
.
get
(
'do_lower_case'
,
True
):
logger
.
warning
(
"The pre-trained model you are loading is an uncased model but you have set "
"`do_lower_case` to False. We are setting `do_lower_case=True` for you "
"but you may want to check this behavior."
)
kwargs
[
'do_lower_case'
]
=
True
else
:
vocab_file
=
pretrained_model_name_or_path
if
os
.
path
.
isdir
(
vocab_file
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment