Expose missing mappings (see #3415)

f8823bad · Julien Chaumond · d0c36a7b · f8823bad · f8823bad
Commit f8823bad authored Mar 24, 2020 by Julien Chaumond
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 6 deletions

examples/run_language_modeling.py examples/run_language_modeling.py +8 -4

src/transformers/__init__.py src/transformers/__init__.py +2 -2

No files found.
--- a/examples/run_language_modeling.py
+++ b/examples/run_language_modeling.py
@@ -38,7 +38,6 @@ from torch.utils.data.distributed import DistributedSampler
 from tqdm import tqdm, trange

 from transformers import (
-    CONFIG_MAPPING,
    MODEL_WITH_LM_HEAD_MAPPING,
    WEIGHTS_NAME,
    AdamW,
@@ -679,7 +678,12 @@ def main():
    elif args.model_name_or_path:
        config = AutoConfig.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir)
    else:
-        config = CONFIG_MAPPING[args.model_type]()
+        # When we release a pip version exposing CONFIG_MAPPING,
+        # we can do `config = CONFIG_MAPPING[args.model_type]()`.
+        raise ValueError(
+            "You are instantiating a new config instance from scratch. This is not supported, but you can do it from another script, save it,"
+            "and load it from here, using --config_name"
+        )

    if args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, cache_dir=args.cache_dir)
@@ -687,8 +691,8 @@ def main():
        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir)
    else:
        raise ValueError(
-            "You are instantiating a new {} tokenizer. This is not supported, but you can do it from another script, save it,"
-            "and load it from here, using --tokenizer_name".format(AutoTokenizer.__name__)
+            "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
+            "and load it from here, using --tokenizer_name"
        )

    if args.block_size <= 0:

--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -32,7 +32,7 @@ from .benchmark_utils import (
    stop_memory_tracing,
 )
 from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig
-from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig
+from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, AutoConfig
 from .configuration_bart import BartConfig
 from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
 from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
@@ -119,7 +119,7 @@ from .pipelines import (
    pipeline,
 )
 from .tokenization_albert import AlbertTokenizer
-from .tokenization_auto import AutoTokenizer
+from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
 from .tokenization_bart import BartTokenizer
 from .tokenization_bert import BasicTokenizer, BertTokenizer, BertTokenizerFast, WordpieceTokenizer
 from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer