Improve type checker performance (#13094)

* conditional declare `TOKENIZER_MAPPING_NAMES` within a `if TYPE_CHECKING` block so that type checkers dont need to evaluate the RHS of the assignment. this improves performance of the pylance/pyright type checkers * Update src/transformers/models/auto/tokenization_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * adding missing import * format Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Improve type checker performance (#13094)
* conditional declare `TOKENIZER_MAPPING_NAMES` within a `if TYPE_CHECKING` block so that type checkers dont need to evaluate the RHS of the assignment. this improves performance of the pylance/pyright type checkers * Update src/transformers/models/auto/tokenization_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * adding missing import * format Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
e46ad22c · Bill Schnurr · GitHub · b9962b86 · e46ad22c
Unverified Commit e46ad22c authored Aug 12, 2021 by Bill Schnurr Committed by GitHub Aug 12, 2021
Show whitespace changes
Inline Side-by-side

Showing with 132 additions and 122 deletions

src/transformers/models/auto/tokenization_auto.py src/transformers/models/auto/tokenization_auto.py +132 -122

No files found.
--- a/src/transformers/models/auto/tokenization_auto.py
+++ b/src/transformers/models/auto/tokenization_auto.py
@@ -18,7 +18,7 @@ import importlib
 import json
 import os
 from collections import OrderedDict
-from typing import Dict, Optional, Union
+from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
 from ...configuration_utils import PretrainedConfig
 from ...file_utils import (
@@ -43,8 +43,12 @@ from .configuration_auto import (
 logger = logging.get_logger(__name__)
+if TYPE_CHECKING:
-TOKENIZER_MAPPING_NAMES = OrderedDict(
+    # This significantly improves completion suggestion performance when
+    # the transformers package is used with Microsoft's Pylance language server.
+    TOKENIZER_MAPPING_NAMES: OrderedDict[str, Tuple[Optional[str], Optional[str]]] = OrderedDict()
+else:
+    TOKENIZER_MAPPING_NAMES = OrderedDict(
        [
            ("retribert", ("RetriBertTokenizer", "RetriBertTokenizerFast" if is_tokenizers_available() else None)),
            ("roformer", ("RoFormerTokenizer", "RoFormerTokenizerFast" if is_tokenizers_available() else None)),
@@ -118,9 +122,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict(
            ("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)),
            (
                "dpr",
-            ("DPRQuestionEncoderTokenizer", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None),
+                (
+                    "DPRQuestionEncoderTokenizer",
+                    "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None,
+                ),
+            ),
+            (
+                "squeezebert",
+                ("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None),
            ),
-        ("squeezebert", ("SqueezeBertTokenizer", "SqueezeBertTokenizerFast" if is_tokenizers_available() else None)),
            ("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
            ("openai-gpt", ("OpenAIGPTTokenizer", "OpenAIGPTTokenizerFast" if is_tokenizers_available() else None)),
            ("gpt2", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
@@ -189,7 +199,7 @@ TOKENIZER_MAPPING_NAMES = OrderedDict(
                ),
            ),
        ]
-)
+    )
 TOKENIZER_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES)