Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
83dba0b6
Commit
83dba0b6
authored
Aug 15, 2019
by
LysandreJik
Browse files
Added RoBERTa tokenizer to AutoTokenizer
parent
e24e19ce
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
3 deletions
+8
-3
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+2
-2
pytorch_transformers/tokenization_auto.py
pytorch_transformers/tokenization_auto.py
+6
-1
No files found.
pytorch_transformers/modeling_auto.py
View file @
83dba0b6
...
@@ -127,7 +127,7 @@ class AutoConfig(object):
...
@@ -127,7 +127,7 @@ class AutoConfig(object):
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
class
AutoModel
(
object
):
class
AutoModel
(
object
):
...
@@ -242,4 +242,4 @@ class AutoModel(object):
...
@@ -242,4 +242,4 @@ class AutoModel(object):
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
pytorch_transformers/tokenization_auto.py
View file @
83dba0b6
...
@@ -24,6 +24,7 @@ from .tokenization_gpt2 import GPT2Tokenizer
...
@@ -24,6 +24,7 @@ from .tokenization_gpt2 import GPT2Tokenizer
from
.tokenization_transfo_xl
import
TransfoXLTokenizer
from
.tokenization_transfo_xl
import
TransfoXLTokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -44,6 +45,7 @@ class AutoTokenizer(object):
...
@@ -44,6 +45,7 @@ class AutoTokenizer(object):
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `roberta`: RobertaTokenizer (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
This class cannot be instantiated using `__init__()` (throw an error).
"""
"""
...
@@ -64,6 +66,7 @@ class AutoTokenizer(object):
...
@@ -64,6 +66,7 @@ class AutoTokenizer(object):
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlnet`: XLNetTokenizer (XLNet model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `xlm`: XLMTokenizer (XLM model)
- contains `roberta`: RobertaTokenizer (XLM model)
Params:
Params:
**pretrained_model_name_or_path**: either:
**pretrained_model_name_or_path**: either:
...
@@ -94,7 +97,9 @@ class AutoTokenizer(object):
...
@@ -94,7 +97,9 @@ class AutoTokenizer(object):
return
XLNetTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
return
XLNetTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
return
XLMTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
"'xlm'
, 'roberta'
"
.
format
(
pretrained_model_name_or_path
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment