Unverified Commit e46ad22c authored by Bill Schnurr's avatar Bill Schnurr Committed by GitHub
Browse files

Improve type checker performance (#13094)



* conditional declare `TOKENIZER_MAPPING_NAMES` within a `if TYPE_CHECKING` block so that type checkers dont need to evaluate the RHS of the assignment.

this improves performance of the pylance/pyright type checkers

* Update src/transformers/models/auto/tokenization_auto.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* adding missing import

* format
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent b9962b86
...@@ -18,7 +18,7 @@ import importlib ...@@ -18,7 +18,7 @@ import importlib
import json import json
import os import os
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, Optional, Union from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...file_utils import ( from ...file_utils import (
...@@ -43,8 +43,12 @@ from .configuration_auto import ( ...@@ -43,8 +43,12 @@ from .configuration_auto import (
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
if TYPE_CHECKING:
TOKENIZER_MAPPING_NAMES = OrderedDict( # This significantly improves completion suggestion performance when
# the transformers package is used with Microsoft's Pylance language server.
TOKENIZER_MAPPING_NAMES: OrderedDict[str, Tuple[Optional[str], Optional[str]]] = OrderedDict()
else:
TOKENIZER_MAPPING_NAMES = OrderedDict(
[ [
("retribert", ("RetriBertTokenizer", "RetriBertTokenizerFast" if is_tokenizers_available() else None)), ("retribert", ("RetriBertTokenizer", "RetriBertTokenizerFast" if is_tokenizers_available() else None)),
("roformer", ("RoFormerTokenizer", "RoFormerTokenizerFast" if is_tokenizers_available() else None)), ("roformer", ("RoFormerTokenizer", "RoFormerTokenizerFast" if is_tokenizers_available() else None)),
...@@ -118,9 +122,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict( ...@@ -118,9 +122,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict(
("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)), ("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)),
( (
"dpr", "dpr",
("DPRQuestionEncoderTokenizer", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None), (
"DPRQuestionEncoderTokenizer",
"DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None,
),
),
(
"squeezebert",
("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None),
), ),
("squeezebert", ("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None)),
("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)), ("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("openai-gpt", ("OpenAIGPTTokenizer", "OpenAIGPTTokenizerFast" if is_tokenizers_available() else None)), ("openai-gpt", ("OpenAIGPTTokenizer", "OpenAIGPTTokenizerFast" if is_tokenizers_available() else None)),
("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)), ("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
...@@ -189,7 +199,7 @@ TOKENIZER_MAPPING_NAMES = OrderedDict( ...@@ -189,7 +199,7 @@ TOKENIZER_MAPPING_NAMES = OrderedDict(
), ),
), ),
] ]
) )
TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES) TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment