Kill model archive maps (#4636)

* Kill model archive maps * Fixup * Also kill model_archive_map for MaskedBertPreTrainedModel * Unhook config_archive_map * Tokenizers: align with model id changes * make style && make quality * Fix CI

Kill model archive maps (#4636)
* Kill model archive maps * Fixup * Also kill model_archive_map for MaskedBertPreTrainedModel * Unhook config_archive_map * Tokenizers: align with model id changes * make style && make quality * Fix CI
d4c2cb40 · Julien Chaumond · GitHub · 47a551d1 · d4c2cb40 · d4c2cb40
Unverified Commit d4c2cb40 authored Jun 02, 2020 by Julien Chaumond Committed by GitHub Jun 02, 2020
20 changed files
--- a/src/transformers/configuration_camembert.py
+++ b/src/transformers/configuration_camembert.py
@@ -36,5 +36,4 @@ class CamembertConfig(RobertaConfig):
    superclass for the appropriate documentation alongside usage examples.
    """
-    pretrained_config_archive_map = CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "camembert"
--- a/src/transformers/configuration_ctrl.py
+++ b/src/transformers/configuration_ctrl.py
@@ -27,7 +27,7 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf
 class CTRLConfig(PretrainedConfig):
    """
-        This is the configuration class to store the configuration of an :class:`~transformers.CTRLModel`.
+        This is the configuration class to store the configuration of a :class:`~transformers.CTRLModel`.
        It is used to instantiate an CTRL model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce.
@@ -76,13 +76,8 @@ class CTRLConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "ctrl"
    def __init__(

--- a/src/transformers/configuration_distilbert.py
+++ b/src/transformers/configuration_distilbert.py
@@ -90,12 +90,7 @@ class DistilBertConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "distilbert"
    def __init__(

--- a/src/transformers/configuration_electra.py
+++ b/src/transformers/configuration_electra.py
@@ -89,12 +89,7 @@ class ElectraConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "electra"
    def __init__(

--- a/src/transformers/configuration_flaubert.py
+++ b/src/transformers/configuration_flaubert.py
@@ -23,10 +23,10 @@ from .configuration_xlm import XLMConfig
 logger = logging.getLogger(__name__)
 FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "flaubert-small-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json",
+    "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json",
-    "flaubert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json",
+    "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json",
-    "flaubert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json",
+    "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json",
-    "flaubert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json",
+    "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json",
 }
@@ -142,7 +142,6 @@ class FlaubertConfig(XLMConfig):
                text in a given language.
    """
-    pretrained_config_archive_map = FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "flaubert"
    def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs):

--- a/src/transformers/configuration_gpt2.py
+++ b/src/transformers/configuration_gpt2.py
@@ -110,13 +110,8 @@ class GPT2Config(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "gpt2"
    def __init__(

--- a/src/transformers/configuration_longformer.py
+++ b/src/transformers/configuration_longformer.py
@@ -33,7 +33,7 @@ LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 class LongformerConfig(RobertaConfig):
    r"""
-        This is the configuration class to store the configuration of an :class:`~transformers.LongformerModel`.
+        This is the configuration class to store the configuration of a :class:`~transformers.LongformerModel`.
        It is used to instantiate an Longformer model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the RoBERTa `roberta-base <https://huggingface.co/roberta-base>`__ architecture with a sequence length 4,096.
@@ -59,12 +59,7 @@ class LongformerConfig(RobertaConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "longformer"
    def __init__(self, attention_window: Union[List[int], int] = 512, sep_token_id: int = 2, **kwargs):

--- a/src/transformers/configuration_marian.py
+++ b/src/transformers/configuration_marian.py
@@ -18,10 +18,9 @@ from .configuration_bart import BartConfig
 PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "marian-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
+    "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
 }
 class MarianConfig(BartConfig):
    model_type = "marian"
-    pretrained_config_archive_map = PRETRAINED_CONFIG_ARCHIVE_MAP
--- a/src/transformers/configuration_openai.py
+++ b/src/transformers/configuration_openai.py
@@ -30,7 +30,7 @@ OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 class OpenAIGPTConfig(PretrainedConfig):
    """
-        This is the configuration class to store the configuration of an :class:`~transformers.OpenAIGPTModel`.
+        This is the configuration class to store the configuration of a :class:`~transformers.OpenAIGPTModel`.
        It is used to instantiate an GPT model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the `GPT <https://huggingface.co/openai-gpt>`__ architecture from OpenAI.
@@ -108,13 +108,8 @@ class OpenAIGPTConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "openai-gpt"
    def __init__(

--- a/src/transformers/configuration_reformer.py
+++ b/src/transformers/configuration_reformer.py
@@ -135,12 +135,7 @@ class ReformerConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "reformer"
    def __init__(

--- a/src/transformers/configuration_roberta.py
+++ b/src/transformers/configuration_roberta.py
@@ -35,7 +35,7 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 class RobertaConfig(BertConfig):
    r"""
-        This is the configuration class to store the configuration of an :class:`~transformers.RobertaModel`.
+        This is the configuration class to store the configuration of a :class:`~transformers.RobertaModel`.
        It is used to instantiate an RoBERTa model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture.
@@ -59,12 +59,7 @@ class RobertaConfig(BertConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "roberta"
    def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs):

--- a/src/transformers/configuration_t5.py
+++ b/src/transformers/configuration_t5.py
@@ -59,7 +59,6 @@ class T5Config(PretrainedConfig):
            initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing).
            layer_norm_eps: The epsilon used by LayerNorm.
    """
-    pretrained_config_archive_map = T5_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "t5"
    def __init__(

--- a/src/transformers/configuration_transfo_xl.py
+++ b/src/transformers/configuration_transfo_xl.py
@@ -30,7 +30,7 @@ TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 class TransfoXLConfig(PretrainedConfig):
    """
-        This is the configuration class to store the configuration of an :class:`~transformers.TransfoXLModel`.
+        This is the configuration class to store the configuration of a :class:`~transformers.TransfoXLModel`.
        It is used to instantiate a Transformer XL model according to the specified arguments, defining the model
        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
        the `Transformer XL <https://huggingface.co/transfo-xl-wt103>`__ architecture.
@@ -110,13 +110,8 @@ class TransfoXLConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "transfo-xl"
    def __init__(

--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -20,7 +20,7 @@ import copy
 import json
 import logging
 import os
-from typing import Dict, Optional, Tuple
+from typing import Dict, Tuple
 from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url
@@ -37,7 +37,6 @@ class PretrainedConfig(object):
            It only affects the model's configuration.
        Class attributes (overridden by derived classes):
-            - ``pretrained_config_archive_map``: a python ``dict`` with `shortcut names` (string) as keys and `url` (string) of associated pretrained model configurations as values.
            - ``model_type``: a string that identifies the model type, that we serialize into the JSON file, and that we use to recreate the correct object in :class:`~transformers.AutoConfig`.
        Args:
@@ -52,7 +51,6 @@ class PretrainedConfig(object):
            torchscript (:obj:`bool`, `optional`, defaults to :obj:`False`):
                Is the model used with Torchscript (for PyTorch models).
    """
-    pretrained_config_archive_map: Dict[str, str] = {}
    model_type: str = ""
    def __init__(self, **kwargs):
@@ -204,9 +202,7 @@ class PretrainedConfig(object):
        return cls.from_dict(config_dict, **kwargs)
    @classmethod
-    def get_config_dict(
+    def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict, Dict]:
-        cls, pretrained_model_name_or_path: str, pretrained_config_archive_map: Optional[Dict] = None, **kwargs
-    ) -> Tuple[Dict, Dict]:
        """
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used
        for instantiating a Config using `from_dict`.
@@ -214,8 +210,6 @@ class PretrainedConfig(object):
        Parameters:
            pretrained_model_name_or_path (:obj:`string`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
-            pretrained_config_archive_map: (:obj:`Dict[str, str]`, `optional`) Dict:
-                A map of `shortcut names` to `url`. By default, will use the current class attribute.
        Returns:
            :obj:`Tuple[Dict, Dict]`: The dictionary that will be used to instantiate the configuration object.
@@ -227,12 +221,7 @@ class PretrainedConfig(object):
        proxies = kwargs.pop("proxies", None)
        local_files_only = kwargs.pop("local_files_only", False)
-        if pretrained_config_archive_map is None:
+        if os.path.isdir(pretrained_model_name_or_path):
-            pretrained_config_archive_map = cls.pretrained_config_archive_map
-        if pretrained_model_name_or_path in pretrained_config_archive_map:
-            config_file = pretrained_config_archive_map[pretrained_model_name_or_path]
-        elif os.path.isdir(pretrained_model_name_or_path):
            config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
        elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
            config_file = pretrained_model_name_or_path
@@ -255,21 +244,11 @@ class PretrainedConfig(object):
            config_dict = cls._dict_from_json_file(resolved_config_file)
        except EnvironmentError:
-            if pretrained_model_name_or_path in pretrained_config_archive_map:
+            msg = (
-                msg = "Couldn't reach server at '{}' to download pretrained model configuration file.".format(
+                f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
-                    config_file
+                f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
-                )
+                f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
-            else:
+            )
-                msg = (
-                    "Can't load '{}'. Make sure that:\n\n"
-                    "- '{}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
-                    "- or '{}' is the correct path to a directory containing a '{}' file\n\n".format(
-                        pretrained_model_name_or_path,
-                        pretrained_model_name_or_path,
-                        pretrained_model_name_or_path,
-                        CONFIG_NAME,
-                    )
-                )
            raise EnvironmentError(msg)
        except json.JSONDecodeError:

--- a/src/transformers/configuration_xlm.py
+++ b/src/transformers/configuration_xlm.py
@@ -152,13 +152,8 @@ class XLMConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "xlm"
    def __init__(

--- a/src/transformers/configuration_xlm_roberta.py
+++ b/src/transformers/configuration_xlm_roberta.py
@@ -39,5 +39,4 @@ class XLMRobertaConfig(RobertaConfig):
    superclass for the appropriate documentation alongside usage examples.
    """
-    pretrained_config_archive_map = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "xlm-roberta"
--- a/src/transformers/configuration_xlnet.py
+++ b/src/transformers/configuration_xlnet.py
@@ -122,13 +122,8 @@ class XLNetConfig(PretrainedConfig):
            # Accessing the model configuration
            configuration = model.config
-        Attributes:
-            pretrained_config_archive_map (Dict[str, str]):
-                A dictionary containing all the available pre-trained checkpoints.
    """
-    pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
    model_type = "xlnet"
    def __init__(

--- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py
+++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py
@@ -32,6 +32,7 @@ from transformers import (
    ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
    T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
    TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
+    WEIGHTS_NAME,
    XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
    XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
    XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -70,6 +71,7 @@ from transformers import (
    XLMRobertaConfig,
    XLNetConfig,
    cached_path,
+    hf_bucket_url,
    is_torch_available,
    load_pytorch_checkpoint_in_tf2_model,
 )
@@ -82,261 +84,103 @@ if is_torch_available():
        BertForPreTraining,
        BertForQuestionAnswering,
        BertForSequenceClassification,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        GPT2LMHeadModel,
-        GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
        XLNetLMHeadModel,
-        XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
        XLMWithLMHeadModel,
-        XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
        XLMRobertaForMaskedLM,
        TransfoXLLMHeadModel,
-        TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
        OpenAIGPTLMHeadModel,
-        OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
        RobertaForMaskedLM,
        RobertaForSequenceClassification,
-        ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
        CamembertForMaskedLM,
-        CamembertForSequenceClassification,
-        CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        FlaubertWithLMHeadModel,
        DistilBertForMaskedLM,
        DistilBertForQuestionAnswering,
-        DistilBertForSequenceClassification,
-        DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        CTRLLMHeadModel,
-        CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
        AlbertForPreTraining,
-        ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        T5ForConditionalGeneration,
-        T5_PRETRAINED_MODEL_ARCHIVE_MAP,
        ElectraForPreTraining,
-        ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
-    )
-else:
-    (
-        BertForPreTraining,
-        BertForQuestionAnswering,
-        BertForSequenceClassification,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        GPT2LMHeadModel,
-        GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLNetLMHeadModel,
-        XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLMWithLMHeadModel,
-        XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLMRobertaForMaskedLM,
-        TransfoXLLMHeadModel,
-        TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
-        OpenAIGPTLMHeadModel,
-        OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        RobertaForMaskedLM,
-        RobertaForSequenceClassification,
-        ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        CamembertForMaskedLM,
-        CamembertForSequenceClassification,
-        CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        FlaubertWithLMHeadModel,
-        DistilBertForMaskedLM,
-        DistilBertForSequenceClassification,
-        DistilBertForQuestionAnswering,
-        DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        CTRLLMHeadModel,
-        CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
-        AlbertForPreTraining,
-        ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        T5ForConditionalGeneration,
-        T5_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ElectraForPreTraining,
-        ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
-    ) = (
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        None,
    )
 logging.basicConfig(level=logging.INFO)
 MODEL_CLASSES = {
-    "bert": (
+    "bert": (BertConfig, TFBertForPreTraining, BertForPreTraining, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        BertConfig,
-        TFBertForPreTraining,
-        BertForPreTraining,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
    "bert-large-uncased-whole-word-masking-finetuned-squad": (
        BertConfig,
        TFBertForQuestionAnswering,
        BertForQuestionAnswering,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "bert-large-cased-whole-word-masking-finetuned-squad": (
        BertConfig,
        TFBertForQuestionAnswering,
        BertForQuestionAnswering,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "bert-base-cased-finetuned-mrpc": (
        BertConfig,
        TFBertForSequenceClassification,
        BertForSequenceClassification,
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
-    "gpt2": (
+    "gpt2": (GPT2Config, TFGPT2LMHeadModel, GPT2LMHeadModel, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        GPT2Config,
+    "xlnet": (XLNetConfig, TFXLNetLMHeadModel, XLNetLMHeadModel, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        TFGPT2LMHeadModel,
+    "xlm": (XLMConfig, TFXLMWithLMHeadModel, XLMWithLMHeadModel, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        GPT2LMHeadModel,
-        GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
-        GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
-    "xlnet": (
-        XLNetConfig,
-        TFXLNetLMHeadModel,
-        XLNetLMHeadModel,
-        XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
-    "xlm": (
-        XLMConfig,
-        TFXLMWithLMHeadModel,
-        XLMWithLMHeadModel,
-        XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
    "xlm-roberta": (
        XLMRobertaConfig,
        TFXLMRobertaForMaskedLM,
        XLMRobertaForMaskedLM,
-        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
        XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "transfo-xl": (
        TransfoXLConfig,
        TFTransfoXLLMHeadModel,
        TransfoXLLMHeadModel,
-        TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
        TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "openai-gpt": (
        OpenAIGPTConfig,
        TFOpenAIGPTLMHeadModel,
        OpenAIGPTLMHeadModel,
-        OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
        OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
-    "roberta": (
+    "roberta": (RobertaConfig, TFRobertaForMaskedLM, RobertaForMaskedLM, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        RobertaConfig,
-        TFRobertaForMaskedLM,
-        RobertaForMaskedLM,
-        ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
    "roberta-large-mnli": (
        RobertaConfig,
        TFRobertaForSequenceClassification,
        RobertaForSequenceClassification,
-        ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
        ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "camembert": (
        CamembertConfig,
        TFCamembertForMaskedLM,
        CamembertForMaskedLM,
-        CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "flaubert": (
        FlaubertConfig,
        TFFlaubertWithLMHeadModel,
        FlaubertWithLMHeadModel,
-        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "distilbert": (
        DistilBertConfig,
        TFDistilBertForMaskedLM,
        DistilBertForMaskedLM,
-        DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
    "distilbert-base-distilled-squad": (
        DistilBertConfig,
        TFDistilBertForQuestionAnswering,
        DistilBertForQuestionAnswering,
-        DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
        DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
    ),
-    "ctrl": (
+    "ctrl": (CTRLConfig, TFCTRLLMHeadModel, CTRLLMHeadModel, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        CTRLConfig,
+    "albert": (AlbertConfig, TFAlbertForPreTraining, AlbertForPreTraining, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        TFCTRLLMHeadModel,
+    "t5": (T5Config, TFT5ForConditionalGeneration, T5ForConditionalGeneration, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        CTRLLMHeadModel,
+    "electra": (ElectraConfig, TFElectraForPreTraining, ElectraForPreTraining, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
-        CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
-        CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
-    "albert": (
-        AlbertConfig,
-        TFAlbertForPreTraining,
-        AlbertForPreTraining,
-        ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
-    "t5": (
-        T5Config,
-        TFT5ForConditionalGeneration,
-        T5ForConditionalGeneration,
-        T5_PRETRAINED_MODEL_ARCHIVE_MAP,
-        T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
-    "electra": (
-        ElectraConfig,
-        TFElectraForPreTraining,
-        ElectraForPreTraining,
-        ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ),
 }
@@ -346,7 +190,7 @@ def convert_pt_checkpoint_to_tf(
    if model_type not in MODEL_CLASSES:
        raise ValueError("Unrecognized model type, should be one of {}.".format(list(MODEL_CLASSES.keys())))
-    config_class, model_class, pt_model_class, aws_model_maps, aws_config_map = MODEL_CLASSES[model_type]
+    config_class, model_class, pt_model_class, aws_config_map = MODEL_CLASSES[model_type]
    # Initialise TF model
    if config_file in aws_config_map:
@@ -358,10 +202,9 @@ def convert_pt_checkpoint_to_tf(
    tf_model = model_class(config)
    # Load weights from tf checkpoint
-    if pytorch_checkpoint_path in aws_model_maps:
+    if pytorch_checkpoint_path in aws_config_map.keys():
-        pytorch_checkpoint_path = cached_path(
+        pytorch_checkpoint_url = hf_bucket_url(pytorch_checkpoint_path, filename=WEIGHTS_NAME)
-            aws_model_maps[pytorch_checkpoint_path], force_download=not use_cached_models
+        pytorch_checkpoint_path = cached_path(pytorch_checkpoint_url, force_download=not use_cached_models)
-        )
    # Load PyTorch checkpoint in tf2 model:
    tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path)

--- a/src/transformers/modeling_albert.py
+++ b/src/transformers/modeling_albert.py
@@ -31,16 +31,17 @@ from .modeling_utils import PreTrainedModel
 logger = logging.getLogger(__name__)
-ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
+ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-pytorch_model.bin",
+    "albert-base-v1",
-    "albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-pytorch_model.bin",
+    "albert-large-v1",
-    "albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-pytorch_model.bin",
+    "albert-xlarge-v1",
-    "albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-pytorch_model.bin",
+    "albert-xxlarge-v1",
-    "albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-pytorch_model.bin",
+    "albert-base-v2",
-    "albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-pytorch_model.bin",
+    "albert-large-v2",
-    "albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-pytorch_model.bin",
+    "albert-xlarge-v2",
-    "albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-pytorch_model.bin",
+    "albert-xxlarge-v2",
-}
+    # See all ALBERT models at https://huggingface.co/models?filter=albert
+]
 def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
@@ -365,7 +366,6 @@ class AlbertPreTrainedModel(PreTrainedModel):
    """
    config_class = AlbertConfig
-    pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "albert"
    def _init_weights(self, module):
@@ -439,7 +439,6 @@ ALBERT_INPUTS_DOCSTRING = r"""
 class AlbertModel(AlbertPreTrainedModel):
    config_class = AlbertConfig
-    pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_albert
    base_model_prefix = "albert"

--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -43,7 +43,6 @@ from .configuration_auto import (
 from .configuration_marian import MarianConfig
 from .configuration_utils import PretrainedConfig
 from .modeling_albert import (
-    ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    AlbertForMaskedLM,
    AlbertForPreTraining,
    AlbertForQuestionAnswering,
@@ -51,14 +50,8 @@ from .modeling_albert import (
    AlbertForTokenClassification,
    AlbertModel,
 )
-from .modeling_bart import (
+from .modeling_bart import BartForConditionalGeneration, BartForSequenceClassification, BartModel
-    BART_PRETRAINED_MODEL_ARCHIVE_MAP,
-    BartForConditionalGeneration,
-    BartForSequenceClassification,
-    BartModel,
-)
 from .modeling_bert import (
-    BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    BertForMaskedLM,
    BertForMultipleChoice,
    BertForPreTraining,
@@ -68,16 +61,14 @@ from .modeling_bert import (
    BertModel,
 )
 from .modeling_camembert import (
-    CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    CamembertForMaskedLM,
    CamembertForMultipleChoice,
    CamembertForSequenceClassification,
    CamembertForTokenClassification,
    CamembertModel,
 )
-from .modeling_ctrl import CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel, CTRLModel
+from .modeling_ctrl import CTRLLMHeadModel, CTRLModel
 from .modeling_distilbert import (
-    DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    DistilBertForMaskedLM,
    DistilBertForQuestionAnswering,
    DistilBertForSequenceClassification,
@@ -85,7 +76,6 @@ from .modeling_distilbert import (
    DistilBertModel,
 )
 from .modeling_electra import (
-    ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
    ElectraForMaskedLM,
    ElectraForPreTraining,
    ElectraForSequenceClassification,
@@ -94,15 +84,13 @@ from .modeling_electra import (
 )
 from .modeling_encoder_decoder import EncoderDecoderModel
 from .modeling_flaubert import (
-    FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    FlaubertForQuestionAnsweringSimple,
    FlaubertForSequenceClassification,
    FlaubertModel,
    FlaubertWithLMHeadModel,
 )
-from .modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, GPT2LMHeadModel, GPT2Model
+from .modeling_gpt2 import GPT2LMHeadModel, GPT2Model
 from .modeling_longformer import (
-    LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP,
    LongformerForMaskedLM,
    LongformerForMultipleChoice,
    LongformerForQuestionAnswering,
@@ -111,10 +99,9 @@ from .modeling_longformer import (
    LongformerModel,
 )
 from .modeling_marian import MarianMTModel
-from .modeling_openai import OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, OpenAIGPTLMHeadModel, OpenAIGPTModel
+from .modeling_openai import OpenAIGPTLMHeadModel, OpenAIGPTModel
 from .modeling_reformer import ReformerModel, ReformerModelWithLMHead
 from .modeling_roberta import (
-    ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
    RobertaForMaskedLM,
    RobertaForMultipleChoice,
    RobertaForQuestionAnswering,
@@ -122,10 +109,9 @@ from .modeling_roberta import (
    RobertaForTokenClassification,
    RobertaModel,
 )
-from .modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5ForConditionalGeneration, T5Model
+from .modeling_t5 import T5ForConditionalGeneration, T5Model
-from .modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, TransfoXLLMHeadModel, TransfoXLModel
+from .modeling_transfo_xl import TransfoXLLMHeadModel, TransfoXLModel
 from .modeling_xlm import (
-    XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
    XLMForQuestionAnsweringSimple,
    XLMForSequenceClassification,
    XLMForTokenClassification,
@@ -133,7 +119,6 @@ from .modeling_xlm import (
    XLMWithLMHeadModel,
 )
 from .modeling_xlm_roberta import (
-    XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
    XLMRobertaForMaskedLM,
    XLMRobertaForMultipleChoice,
    XLMRobertaForSequenceClassification,
@@ -141,7 +126,6 @@ from .modeling_xlm_roberta import (
    XLMRobertaModel,
 )
 from .modeling_xlnet import (
-    XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
    XLNetForMultipleChoice,
    XLNetForQuestionAnsweringSimple,
    XLNetForSequenceClassification,
@@ -154,30 +138,6 @@ from .modeling_xlnet import (
 logger = logging.getLogger(__name__)
-ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict(
-    (key, value)
-    for pretrained_map in [
-        BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        BART_PRETRAINED_MODEL_ARCHIVE_MAP,
-        OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
-        GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
-        CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        T5_PRETRAINED_MODEL_ARCHIVE_MAP,
-        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
-        XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
-        LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP,
-    ]
-    for key, value, in pretrained_map.items()
-)
 MODEL_MAPPING = OrderedDict(
    [
        (T5Config, T5Model),
@@ -372,29 +332,26 @@ class AutoModel:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `t5`: :class:`~transformers.T5Model` (T5 model)
-        The base model class to instantiate is selected as the first pattern matching
+            - `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `albert`: :class:`~transformers.AlbertModel` (ALBERT model)
-            - contains `t5`: :class:`~transformers.T5Model` (T5 model)
+            - `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model)
-            - contains `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model)
+            - `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model)
-            - contains `albert`: :class:`~transformers.AlbertModel` (ALBERT model)
+            - `longformer` :class:`~transformers.LongformerModel` (Longformer model)
-            - contains `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model)
+            - `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model)
-            - contains `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model)
+            - `bert`: :class:`~transformers.BertModel` (Bert model)
-            - contains `longformer` :class:`~transformers.LongformerModel` (Longformer model)
+            - `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model)
-            - contains `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model)
+            - `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model)
-            - contains `bert`: :class:`~transformers.BertModel` (Bert model)
+            - `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model)
-            - contains `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model)
+            - `xlnet`: :class:`~transformers.XLNetModel` (XLNet model)
-            - contains `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model)
+            - `xlm`: :class:`~transformers.XLMModel` (XLM model)
-            - contains `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model)
+            - `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL  model)
-            - contains `xlnet`: :class:`~transformers.XLNetModel` (XLNet model)
+            - `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert  model)
-            - contains `xlm`: :class:`~transformers.XLMModel` (XLM model)
+            - `electra`: :class:`~transformers.ElectraModel` (Electra  model)
-            - contains `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL  model)
-            - contains `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert  model)
+        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-            - contains `electra`: :class:`~transformers.ElectraModel` (Electra  model)
+        To train the model, you should first set it back in training mode with `model.train()`
-            The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
-            To train the model, you should first set it back in training mode with `model.train()`
        Args:
            pretrained_model_name_or_path: either:
@@ -528,26 +485,23 @@ class AutoModelForPreTraining:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
-        The model class to instantiate is selected as the first pattern matching
+            - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
-            - contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
+            - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
-            - contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
+            - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
-            - contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
+            - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
-            - contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
+            - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
-            - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
+            - `bert`: :class:`~transformers.BertForPreTraining` (Bert model)
-            - contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
+            - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
-            - contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
+            - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
-            - contains `bert`: :class:`~transformers.BertForPreTraining` (Bert model)
+            - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
-            - contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
+            - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
-            - contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
+            - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
-            - contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
+            - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
-            - contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
+            - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
-            - contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
+            - `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model)
-            - contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
-            - contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
-            - contains `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model)
        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`
@@ -679,26 +633,23 @@ class AutoModelWithLMHead:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
-        The model class to instantiate is selected as the first pattern matching
+            - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
-            - contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
+            - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
-            - contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
+            - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
-            - contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
+            - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
-            - contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
+            - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
-            - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
+            - `bert`: :class:`~transformers.BertForMaskedLM` (Bert model)
-            - contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
+            - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
-            - contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
+            - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
-            - contains `bert`: :class:`~transformers.BertForMaskedLM` (Bert model)
+            - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
-            - contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
+            - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
-            - contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
+            - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
-            - contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
+            - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
-            - contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
+            - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
-            - contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
+            - `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
-            - contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
-            - contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
-            - contains `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`
@@ -830,18 +781,15 @@ class AutoModelForSequenceClassification:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model)
-        The model class to instantiate is selected as the first pattern matching
+            - `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model)
-            - contains `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model)
+            - `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model)
-            - contains `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model)
+            - `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model)
-            - contains `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model)
+            - `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
-            - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model)
+            - `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
-            - contains `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model)
+            - `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
-            - contains `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
-            - contains `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
-            - contains `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`
@@ -979,16 +927,13 @@ class AutoModelForQuestionAnswering:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model)
-        The model class to instantiate is selected as the first pattern matching
+            - `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model)
-            - contains `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model)
+            - `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
-            - contains `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model)
+            - `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
-            - contains `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model)
+            - `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
-            - contains `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
-            - contains `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
-            - contains `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`
@@ -1127,18 +1072,15 @@ class AutoModelForTokenClassification:
        The `from_pretrained()` method takes care of returning the correct model class instance
        based on the `model_type` property of the config object, or when it's missing,
-        falling back to using pattern matching on the `pretrained_model_name_or_path` string.
+        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+            - `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model)
-        The model class to instantiate is selected as the first pattern matching
+            - `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model)
-        in the `pretrained_model_name_or_path` string (in the following order):
+            - `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model)
-            - contains `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model)
+            - `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model)
-            - contains `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model)
+            - `bert`: :class:`~transformers.BertForTokenClassification` (Bert model)
-            - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model)
+            - `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model)
-            - contains `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model)
+            - `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
-            - contains `bert`: :class:`~transformers.BertForTokenClassification` (Bert model)
+            - `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
-            - contains `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model)
-            - contains `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
-            - contains `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
        To train the model, you should first set it back in training mode with `model.train()`