Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7ead04ce
Unverified
Commit
7ead04ce
authored
Dec 26, 2019
by
Anthony MOI
Browse files
FastPreTrainedTokenizer => PreTrainedTokenizerFast
parent
1f82a5d9
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
6 deletions
+6
-6
src/transformers/tokenization_bert.py
src/transformers/tokenization_bert.py
+2
-2
src/transformers/tokenization_gpt2.py
src/transformers/tokenization_gpt2.py
+2
-2
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils.py
+2
-2
No files found.
src/transformers/tokenization_bert.py
View file @
7ead04ce
...
...
@@ -22,7 +22,7 @@ import unicodedata
import
tokenizers
as
tk
from
.tokenization_utils
import
Fast
PreTrainedTokenizer
,
PreTrainedTokenizer
from
.tokenization_utils
import
PreTrainedTokenizer
Fast
,
PreTrainedTokenizer
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -529,7 +529,7 @@ def _is_punctuation(char):
return
False
class
BertTokenizerFast
(
Fast
PreTrainedTokenizer
):
class
BertTokenizerFast
(
PreTrainedTokenizer
Fast
):
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
pretrained_init_configuration
=
PRETRAINED_INIT_CONFIGURATION
...
...
src/transformers/tokenization_gpt2.py
View file @
7ead04ce
...
...
@@ -23,7 +23,7 @@ from functools import lru_cache
import
regex
as
re
import
tokenizers
as
tk
from
.tokenization_utils
import
Fast
PreTrainedTokenizer
,
PreTrainedTokenizer
from
.tokenization_utils
import
PreTrainedTokenizer
Fast
,
PreTrainedTokenizer
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -249,7 +249,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
return
vocab_file
,
merge_file
class
GPT2TokenizerFast
(
Fast
PreTrainedTokenizer
):
class
GPT2TokenizerFast
(
PreTrainedTokenizer
Fast
):
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
...
...
src/transformers/tokenization_utils.py
View file @
7ead04ce
...
...
@@ -1412,9 +1412,9 @@ class PreTrainedTokenizer(object):
return
out_string
class
Fast
PreTrainedTokenizer
(
PreTrainedTokenizer
):
class
PreTrainedTokenizer
Fast
(
PreTrainedTokenizer
):
def
__init__
(
self
,
**
kwargs
):
super
(
Fast
PreTrainedTokenizer
,
self
).
__init__
(
**
kwargs
)
super
(
PreTrainedTokenizer
Fast
,
self
).
__init__
(
**
kwargs
)
@
property
def
tokenizer
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment