Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
51397336
"vscode:/vscode.git/clone" did not exist on "f383963f4a2ee4b9cb52abbd4a57270c7b0b68d9"
Unverified
Commit
51397336
authored
May 26, 2020
by
Suraj Patil
Committed by
GitHub
May 25, 2020
Browse files
LongformerTokenizerFast (#4547)
parent
c9c385c5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
2 deletions
+11
-2
src/transformers/__init__.py
src/transformers/__init__.py
+1
-1
src/transformers/tokenization_longformer.py
src/transformers/tokenization_longformer.py
+10
-1
No files found.
src/transformers/__init__.py
View file @
51397336
...
...
@@ -139,7 +139,7 @@ from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFas
from
.tokenization_electra
import
ElectraTokenizer
,
ElectraTokenizerFast
from
.tokenization_flaubert
import
FlaubertTokenizer
from
.tokenization_gpt2
import
GPT2Tokenizer
,
GPT2TokenizerFast
from
.tokenization_longformer
import
LongformerTokenizer
from
.tokenization_longformer
import
LongformerTokenizer
,
LongformerTokenizerFast
from
.tokenization_openai
import
OpenAIGPTTokenizer
,
OpenAIGPTTokenizerFast
from
.tokenization_reformer
import
ReformerTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
,
RobertaTokenizerFast
...
...
src/transformers/tokenization_longformer.py
View file @
51397336
...
...
@@ -15,7 +15,7 @@
import
logging
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
,
RobertaTokenizerFast
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -40,3 +40,12 @@ class LongformerTokenizer(RobertaTokenizer):
"vocab_file"
:
{
m
:
vocab_url
for
m
in
_all_longformer_models
},
"merges_file"
:
{
m
:
merges_url
for
m
in
_all_longformer_models
},
}
class
LongformerTokenizerFast
(
RobertaTokenizerFast
):
# merges and vocab same as Roberta
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
pretrained_vocab_files_map
=
{
"vocab_file"
:
{
m
:
vocab_url
for
m
in
_all_longformer_models
},
"merges_file"
:
{
m
:
merges_url
for
m
in
_all_longformer_models
},
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment