Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
83dba0b6
Commit
83dba0b6
authored
Aug 15, 2019
by
LysandreJik
Browse files
Added RoBERTa tokenizer to AutoTokenizer
parent
e24e19ce
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
3 deletions
+8
-3
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+2
-2
pytorch_transformers/tokenization_auto.py
pytorch_transformers/tokenization_auto.py
+6
-1
No files found.
pytorch_transformers/modeling_auto.py
View file @
83dba0b6
...
...
@@ -127,7 +127,7 @@ class AutoConfig(object):
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
class
AutoModel
(
object
):
...
...
@@ -242,4 +242,4 @@ class AutoModel(object):
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
pytorch_transformers/tokenization_auto.py
View file @
83dba0b6
...
...
@@ -24,6 +24,7 @@ from .tokenization_gpt2 import GPT2Tokenizer
from
.tokenization_transfo_xl
import
TransfoXLTokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -44,6 +45,7 @@ class AutoTokenizer(object):
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `roberta`: RobertaTokenizer (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
...
...
@@ -64,6 +66,7 @@ class AutoTokenizer(object):
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `roberta`: RobertaTokenizer (XLM model)
Params:
**pretrained_model_name_or_path**: either:
...
...
@@ -94,7 +97,9 @@ class AutoTokenizer(object):
return
XLNetTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment