Unverified Commit 7ead04ce authored by Anthony MOI's avatar Anthony MOI
Browse files

FastPreTrainedTokenizer => PreTrainedTokenizerFast

parent 1f82a5d9
...@@ -22,7 +22,7 @@ import unicodedata ...@@ -22,7 +22,7 @@ import unicodedata
import tokenizers as tk import tokenizers as tk
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -529,7 +529,7 @@ def _is_punctuation(char): ...@@ -529,7 +529,7 @@ def _is_punctuation(char):
return False return False
class BertTokenizerFast(FastPreTrainedTokenizer): class BertTokenizerFast(PreTrainedTokenizerFast):
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
......
...@@ -23,7 +23,7 @@ from functools import lru_cache ...@@ -23,7 +23,7 @@ from functools import lru_cache
import regex as re import regex as re
import tokenizers as tk import tokenizers as tk
from .tokenization_utils import FastPreTrainedTokenizer, PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizerFast, PreTrainedTokenizer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -249,7 +249,7 @@ class GPT2Tokenizer(PreTrainedTokenizer): ...@@ -249,7 +249,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
return vocab_file, merge_file return vocab_file, merge_file
class GPT2TokenizerFast(FastPreTrainedTokenizer): class GPT2TokenizerFast(PreTrainedTokenizerFast):
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
......
...@@ -1412,9 +1412,9 @@ class PreTrainedTokenizer(object): ...@@ -1412,9 +1412,9 @@ class PreTrainedTokenizer(object):
return out_string return out_string
class FastPreTrainedTokenizer(PreTrainedTokenizer): class PreTrainedTokenizerFast(PreTrainedTokenizer):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(FastPreTrainedTokenizer, self).__init__(**kwargs) super(PreTrainedTokenizerFast, self).__init__(**kwargs)
@property @property
def tokenizer(self): def tokenizer(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment