Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
9208f57b
Unverified
Commit
9208f57b
authored
Jun 14, 2020
by
Suraj Patil
Committed by
GitHub
Jun 14, 2020
Browse files
BartTokenizerFast (#4878)
parent
403d3098
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
2 deletions
+11
-2
src/transformers/__init__.py
src/transformers/__init__.py
+1
-1
src/transformers/tokenization_bart.py
src/transformers/tokenization_bart.py
+10
-1
No files found.
src/transformers/__init__.py
View file @
9208f57b
...
...
@@ -118,7 +118,7 @@ from .pipelines import (
# Tokenizers
from
.tokenization_albert
import
AlbertTokenizer
from
.tokenization_auto
import
TOKENIZER_MAPPING
,
AutoTokenizer
from
.tokenization_bart
import
BartTokenizer
,
MBartTokenizer
from
.tokenization_bart
import
BartTokenizer
,
BartTokenizerFast
,
MBartTokenizer
from
.tokenization_bert
import
BasicTokenizer
,
BertTokenizer
,
BertTokenizerFast
,
WordpieceTokenizer
from
.tokenization_bert_japanese
import
BertJapaneseTokenizer
,
CharacterTokenizer
,
MecabTokenizer
from
.tokenization_camembert
import
CamembertTokenizer
...
...
src/transformers/tokenization_bart.py
View file @
9208f57b
...
...
@@ -16,7 +16,7 @@
import
logging
from
typing
import
List
,
Optional
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
,
RobertaTokenizerFast
from
.tokenization_utils
import
BatchEncoding
from
.tokenization_xlm_roberta
import
XLMRobertaTokenizer
...
...
@@ -44,6 +44,15 @@ class BartTokenizer(RobertaTokenizer):
}
class
BartTokenizerFast
(
RobertaTokenizerFast
):
# merges and vocab same as Roberta
max_model_input_sizes
=
{
m
:
1024
for
m
in
_all_bart_models
}
pretrained_vocab_files_map
=
{
"vocab_file"
:
{
m
:
vocab_url
for
m
in
_all_bart_models
},
"merges_file"
:
{
m
:
merges_url
for
m
in
_all_bart_models
},
}
_all_mbart_models
=
[
"facebook/mbart-large-en-ro"
]
SPM_URL
=
"https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment