Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
27b0f86d
Commit
27b0f86d
authored
Jul 26, 2019
by
thomwolf
Browse files
clean up pretrained
parent
57e54ec0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
2 deletions
+5
-2
pytorch_transformers/tokenization_utils.py
pytorch_transformers/tokenization_utils.py
+5
-2
No files found.
pytorch_transformers/tokenization_utils.py
View file @
27b0f86d
...
@@ -152,11 +152,13 @@ class PreTrainedTokenizer(object):
...
@@ -152,11 +152,13 @@ class PreTrainedTokenizer(object):
@
classmethod
@
classmethod
def
_from_pretrained
(
cls
,
pretrained_model_name_or_path
,
cache_dir
=
None
,
*
inputs
,
**
kwargs
):
def
_from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
):
"""
"""
Instantiate a PreTrainedTokenizer from pre-trained vocabulary files.
Instantiate a PreTrainedTokenizer from pre-trained vocabulary files.
Download and cache the vocabulary files if needed.
Download and cache the vocabulary files if needed.
"""
"""
cache_dir
=
kwargs
.
pop
(
'cache_dir'
,
None
)
s3_models
=
list
(
cls
.
max_model_input_sizes
.
keys
())
s3_models
=
list
(
cls
.
max_model_input_sizes
.
keys
())
vocab_files
=
{}
vocab_files
=
{}
if
pretrained_model_name_or_path
in
s3_models
:
if
pretrained_model_name_or_path
in
s3_models
:
...
@@ -308,7 +310,8 @@ class PreTrainedTokenizer(object):
...
@@ -308,7 +310,8 @@ class PreTrainedTokenizer(object):
to_add_tokens
=
[]
to_add_tokens
=
[]
for
token
in
new_tokens
:
for
token
in
new_tokens
:
if
self
.
convert_tokens_to_ids
(
token
)
==
self
.
convert_tokens_to_ids
(
self
.
unk_token
):
if
token
!=
self
.
unk_token
and
\
self
.
convert_tokens_to_ids
(
token
)
==
self
.
convert_tokens_to_ids
(
self
.
unk_token
):
to_add_tokens
.
append
(
token
)
to_add_tokens
.
append
(
token
)
logger
.
info
(
"Adding %s to the vocabulary"
,
token
)
logger
.
info
(
"Adding %s to the vocabulary"
,
token
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment