Kill model archive maps (#4636)

* Kill model archive maps * Fixup * Also kill model_archive_map for MaskedBertPreTrainedModel * Unhook config_archive_map * Tokenizers: align with model id changes * make style && make quality * Fix CI

Kill model archive maps (#4636)
* Kill model archive maps * Fixup * Also kill model_archive_map for MaskedBertPreTrainedModel * Unhook config_archive_map * Tokenizers: align with model id changes * make style && make quality * Fix CI
d4c2cb40 · Julien Chaumond · GitHub · 47a551d1 · d4c2cb40 · d4c2cb40
Unverified Commit d4c2cb40 authored Jun 02, 2020 by Julien Chaumond Committed by GitHub Jun 02, 2020
20 changed files
--- a/src/transformers/modeling_bart.py
+++ b/src/transformers/modeling_bart.py
@@ -32,13 +32,15 @@ from .modeling_utils import PreTrainedModel, create_position_ids_from_input_ids
 logger = logging.getLogger(__name__)


-BART_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "bart-large": "https://cdn.huggingface.co/facebook/bart-large/pytorch_model.bin",
-    "bart-large-mnli": "https://cdn.huggingface.co/facebook/bart-large-mnli/pytorch_model.bin",
-    "bart-large-cnn": "https://cdn.huggingface.co/facebook/bart-large-cnn/pytorch_model.bin",
-    "bart-large-xsum": "https://cdn.huggingface.co/facebook/bart-large-xsum/pytorch_model.bin",
-    "mbart-large-en-ro": "https://cdn.huggingface.co/facebook/mbart-large-en-ro/pytorch_model.bin",
-}
+BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "facebook/bart-large",
+    "facebook/bart-large-mnli",
+    "facebook/bart-large-cnn",
+    "facebook/bart-large-xsum",
+    "facebook/mbart-large-en-ro",
+    # See all BART models at https://huggingface.co/models?filter=bart
+]
+

 BART_START_DOCSTRING = r"""

@@ -118,7 +120,6 @@ def _prepare_bart_decoder_inputs(
 class PretrainedBartModel(PreTrainedModel):
    config_class = BartConfig
    base_model_prefix = "model"
-    pretrained_model_archive_map = BART_PRETRAINED_MODEL_ARCHIVE_MAP

    def _init_weights(self, module):
        std = self.config.init_std

--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -32,30 +32,31 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer

 logger = logging.getLogger(__name__)

-BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-pytorch_model.bin",
-    "bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-pytorch_model.bin",
-    "bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin",
-    "bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-pytorch_model.bin",
-    "bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-pytorch_model.bin",
-    "bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-pytorch_model.bin",
-    "bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-pytorch_model.bin",
-    "bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-pytorch_model.bin",
-    "bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-pytorch_model.bin",
-    "bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-pytorch_model.bin",
-    "bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin",
-    "bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin",
-    "bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-pytorch_model.bin",
-    "bert-base-german-dbmdz-cased": "https://cdn.huggingface.co/bert-base-german-dbmdz-cased-pytorch_model.bin",
-    "bert-base-german-dbmdz-uncased": "https://cdn.huggingface.co/bert-base-german-dbmdz-uncased-pytorch_model.bin",
-    "bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/pytorch_model.bin",
-    "bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/pytorch_model.bin",
-    "bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/pytorch_model.bin",
-    "bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/pytorch_model.bin",
-    "bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin",
-    "bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin",
-    "bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/pytorch_model.bin",
-}
+BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "bert-base-uncased",
+    "bert-large-uncased",
+    "bert-base-cased",
+    "bert-large-cased",
+    "bert-base-multilingual-uncased",
+    "bert-base-multilingual-cased",
+    "bert-base-chinese",
+    "bert-base-german-cased",
+    "bert-large-uncased-whole-word-masking",
+    "bert-large-cased-whole-word-masking",
+    "bert-large-uncased-whole-word-masking-finetuned-squad",
+    "bert-large-cased-whole-word-masking-finetuned-squad",
+    "bert-base-cased-finetuned-mrpc",
+    "bert-base-german-dbmdz-cased",
+    "bert-base-german-dbmdz-uncased",
+    "cl-tohoku/bert-base-japanese",
+    "cl-tohoku/bert-base-japanese-whole-word-masking",
+    "cl-tohoku/bert-base-japanese-char",
+    "cl-tohoku/bert-base-japanese-char-whole-word-masking",
+    "TurkuNLP/bert-base-finnish-cased-v1",
+    "TurkuNLP/bert-base-finnish-uncased-v1",
+    "wietsedv/bert-base-dutch-cased",
+    # See all BERT models at https://huggingface.co/models?filter=bert
+]


 def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
@@ -513,7 +514,6 @@ class BertPreTrainedModel(PreTrainedModel):
    """

    config_class = BertConfig
-    pretrained_model_archive_map = BERT_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_bert
    base_model_prefix = "bert"


--- a/src/transformers/modeling_camembert.py
+++ b/src/transformers/modeling_camembert.py
@@ -31,11 +31,12 @@ from .modeling_roberta import (

 logger = logging.getLogger(__name__)

-CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "camembert-base": "https://cdn.huggingface.co/camembert-base-pytorch_model.bin",
-    "umberto-commoncrawl-cased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/pytorch_model.bin",
-    "umberto-wikipedia-uncased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/pytorch_model.bin",
-}
+CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "camembert-base",
+    "Musixmatch/umberto-commoncrawl-cased-v1",
+    "Musixmatch/umberto-wikipedia-uncased-v1",
+    # See all CamemBERT models at https://huggingface.co/models?filter=camembert
+]

 CAMEMBERT_START_DOCSTRING = r"""

@@ -62,7 +63,6 @@ class CamembertModel(RobertaModel):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -75,7 +75,6 @@ class CamembertForMaskedLM(RobertaForMaskedLM):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -90,7 +89,6 @@ class CamembertForSequenceClassification(RobertaForSequenceClassification):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -105,7 +103,6 @@ class CamembertForMultipleChoice(RobertaForMultipleChoice):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -120,7 +117,6 @@ class CamembertForTokenClassification(RobertaForTokenClassification):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -135,4 +131,3 @@ class CamembertForQuestionAnswering(RobertaForQuestionAnswering):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
--- a/src/transformers/modeling_ctrl.py
+++ b/src/transformers/modeling_ctrl.py
@@ -30,7 +30,10 @@ from .modeling_utils import Conv1D, PreTrainedModel

 logger = logging.getLogger(__name__)

-CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf-ctrl/pytorch/seqlen256_v1.bin"}
+CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "ctrl"
+    # See all CTRL models at https://huggingface.co/models?filter=ctrl
+]


 def angle_defn(pos, i, d_model_size):
@@ -178,7 +181,6 @@ class CTRLPreTrainedModel(PreTrainedModel):
    """

    config_class = CTRLConfig
-    pretrained_model_archive_map = CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "transformer"

    def _init_weights(self, module):

--- a/src/transformers/modeling_distilbert.py
+++ b/src/transformers/modeling_distilbert.py
@@ -36,15 +36,16 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer
 logger = logging.getLogger(__name__)


-DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-pytorch_model.bin",
-    "distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-pytorch_model.bin",
-    "distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-pytorch_model.bin",
-    "distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-pytorch_model.bin",
-    "distilbert-base-german-cased": "https://cdn.huggingface.co/distilbert-base-german-cased-pytorch_model.bin",
-    "distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-pytorch_model.bin",
-    "distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin",
-}
+DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "distilbert-base-uncased",
+    "distilbert-base-uncased-distilled-squad",
+    "distilbert-base-cased",
+    "distilbert-base-cased-distilled-squad",
+    "distilbert-base-german-cased",
+    "distilbert-base-multilingual-cased",
+    "distilbert-base-uncased-finetuned-sst-2-english",
+    # See all DistilBERT models at https://huggingface.co/models?filter=distilbert
+]


 # UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE #
@@ -327,7 +328,6 @@ class DistilBertPreTrainedModel(PreTrainedModel):
    """

    config_class = DistilBertConfig
-    pretrained_model_archive_map = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = None
    base_model_prefix = "distilbert"


--- a/src/transformers/modeling_electra.py
+++ b/src/transformers/modeling_electra.py
@@ -14,14 +14,15 @@ from .modeling_bert import BertEmbeddings, BertEncoder, BertLayerNorm, BertPreTr
 logger = logging.getLogger(__name__)


-ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "google/electra-small-generator": "https://cdn.huggingface.co/google/electra-small-generator/pytorch_model.bin",
-    "google/electra-base-generator": "https://cdn.huggingface.co/google/electra-base-generator/pytorch_model.bin",
-    "google/electra-large-generator": "https://cdn.huggingface.co/google/electra-large-generator/pytorch_model.bin",
-    "google/electra-small-discriminator": "https://cdn.huggingface.co/google/electra-small-discriminator/pytorch_model.bin",
-    "google/electra-base-discriminator": "https://cdn.huggingface.co/google/electra-base-discriminator/pytorch_model.bin",
-    "google/electra-large-discriminator": "https://cdn.huggingface.co/google/electra-large-discriminator/pytorch_model.bin",
-}
+ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "google/electra-small-generator",
+    "google/electra-base-generator",
+    "google/electra-large-generator",
+    "google/electra-small-discriminator",
+    "google/electra-base-discriminator",
+    "google/electra-large-discriminator",
+    # See all ELECTRA models at https://huggingface.co/models?filter=electra
+]


 def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_or_generator="discriminator"):
@@ -160,7 +161,6 @@ class ElectraPreTrainedModel(BertPreTrainedModel):
    """

    config_class = ElectraConfig
-    pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_electra
    base_model_prefix = "electra"


--- a/src/transformers/modeling_flaubert.py
+++ b/src/transformers/modeling_flaubert.py
@@ -35,12 +35,13 @@ from .modeling_xlm import (

 logger = logging.getLogger(__name__)

-FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "flaubert-small-cased": "https://cdn.huggingface.co/flaubert/flaubert_small_cased/pytorch_model.bin",
-    "flaubert-base-uncased": "https://cdn.huggingface.co/flaubert/flaubert_base_uncased/pytorch_model.bin",
-    "flaubert-base-cased": "https://cdn.huggingface.co/flaubert/flaubert_base_cased/pytorch_model.bin",
-    "flaubert-large-cased": "https://cdn.huggingface.co/flaubert/flaubert_large_cased/pytorch_model.bin",
-}
+FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "flaubert/flaubert_small_cased",
+    "flaubert/flaubert_base_uncased",
+    "flaubert/flaubert_base_cased",
+    "flaubert/flaubert_large_cased",
+    # See all Flaubert models at https://huggingface.co/models?filter=flaubert
+]


 FLAUBERT_START_DOCSTRING = r"""
@@ -109,7 +110,6 @@ FLAUBERT_INPUTS_DOCSTRING = r"""
 class FlaubertModel(XLMModel):

    config_class = FlaubertConfig
-    pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP

    def __init__(self, config):  # , dico, is_encoder, with_output):
        super().__init__(config)
@@ -304,7 +304,6 @@ class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
    """

    config_class = FlaubertConfig
-    pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP

    def __init__(self, config):
        super().__init__(config)
@@ -324,7 +323,6 @@ class FlaubertForSequenceClassification(XLMForSequenceClassification):
    """

    config_class = FlaubertConfig
-    pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP

    def __init__(self, config):
        super().__init__(config)
@@ -344,7 +342,6 @@ class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
    """

    config_class = FlaubertConfig
-    pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP

    def __init__(self, config):
        super().__init__(config)
@@ -364,7 +361,6 @@ class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
    """

    config_class = FlaubertConfig
-    pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP

    def __init__(self, config):
        super().__init__(config)

--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -31,13 +31,14 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv

 logger = logging.getLogger(__name__)

-GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "gpt2": "https://cdn.huggingface.co/gpt2-pytorch_model.bin",
-    "gpt2-medium": "https://cdn.huggingface.co/gpt2-medium-pytorch_model.bin",
-    "gpt2-large": "https://cdn.huggingface.co/gpt2-large-pytorch_model.bin",
-    "gpt2-xl": "https://cdn.huggingface.co/gpt2-xl-pytorch_model.bin",
-    "distilgpt2": "https://cdn.huggingface.co/distilgpt2-pytorch_model.bin",
-}
+GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "gpt2",
+    "gpt2-medium",
+    "gpt2-large",
+    "gpt2-xl",
+    "distilgpt2",
+    # See all GPT-2 models at https://huggingface.co/models?filter=gpt2
+]


 def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
@@ -251,7 +252,6 @@ class GPT2PreTrainedModel(PreTrainedModel):
    """

    config_class = GPT2Config
-    pretrained_model_archive_map = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_gpt2
    base_model_prefix = "transformer"


--- a/src/transformers/modeling_longformer.py
+++ b/src/transformers/modeling_longformer.py
@@ -30,13 +30,14 @@ from .modeling_roberta import RobertaLMHead, RobertaModel

 logger = logging.getLogger(__name__)

-LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "allenai/longformer-base-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096/pytorch_model.bin",
-    "allenai/longformer-large-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096/pytorch_model.bin",
-    "allenai/longformer-large-4096-finetuned-triviaqa": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-finetuned-triviaqa/pytorch_model.bin",
-    "allenai/longformer-base-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096-extra.pos.embd.only/pytorch_model.bin",
-    "allenai/longformer-large-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-extra.pos.embd.only/pytorch_model.bin",
-}
+LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "allenai/longformer-base-4096",
+    "allenai/longformer-large-4096",
+    "allenai/longformer-large-4096-finetuned-triviaqa",
+    "allenai/longformer-base-4096-extra.pos.embd.only",
+    "allenai/longformer-large-4096-extra.pos.embd.only",
+    # See all Longformer models at https://huggingface.co/models?filter=longformer
+]


 def _get_question_end_index(input_ids, sep_token_id):
@@ -513,7 +514,6 @@ class LongformerModel(RobertaModel):
    """

    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):
@@ -685,7 +685,6 @@ class LongformerModel(RobertaModel):
 @add_start_docstrings("""Longformer Model with a `language modeling` head on top. """, LONGFORMER_START_DOCSTRING)
 class LongformerForMaskedLM(BertPreTrainedModel):
    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):
@@ -776,7 +775,6 @@ class LongformerForMaskedLM(BertPreTrainedModel):
 )
 class LongformerForSequenceClassification(BertPreTrainedModel):
    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):
@@ -893,7 +891,6 @@ class LongformerClassificationHead(nn.Module):
 )
 class LongformerForQuestionAnswering(BertPreTrainedModel):
    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):
@@ -1018,7 +1015,6 @@ class LongformerForQuestionAnswering(BertPreTrainedModel):
 )
 class LongformerForTokenClassification(BertPreTrainedModel):
    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):
@@ -1119,7 +1115,6 @@ class LongformerForTokenClassification(BertPreTrainedModel):
 )
 class LongformerForMultipleChoice(BertPreTrainedModel):
    config_class = LongformerConfig
-    pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "longformer"

    def __init__(self, config):

--- a/src/transformers/modeling_marian.py
+++ b/src/transformers/modeling_marian.py
@@ -18,6 +18,11 @@
 from transformers.modeling_bart import BartForConditionalGeneration


+MARIAN_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    # See all Marian models at https://huggingface.co/models?search=Helsinki-NLP
+]
+
+
 class MarianMTModel(BartForConditionalGeneration):
    r"""
    Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints.
@@ -41,8 +46,6 @@ class MarianMTModel(BartForConditionalGeneration):

    """

-    pretrained_model_archive_map = {}  # see https://huggingface.co/models?search=Helsinki-NLP
-
    def prepare_logits_for_generation(self, logits, cur_len, max_length):
        logits[:, self.config.pad_token_id] = float("-inf")
        if cur_len == max_length - 1 and self.config.eos_token_id is not None:

--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@@ -33,7 +33,10 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv

 logger = logging.getLogger(__name__)

-OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = {"openai-gpt": "https://cdn.huggingface.co/openai-gpt-pytorch_model.bin"}
+OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "openai-gpt",
+    # See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt
+]


 def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
@@ -252,7 +255,6 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
    """

    config_class = OpenAIGPTConfig
-    pretrained_model_archive_map = OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_openai_gpt
    base_model_prefix = "transformer"


--- a/src/transformers/modeling_reformer.py
+++ b/src/transformers/modeling_reformer.py
@@ -35,10 +35,11 @@ from .modeling_utils import PreTrainedModel, apply_chunking_to_forward

 logger = logging.getLogger(__name__)

-REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/pytorch_model.bin",
-    "google/reformer-enwik8": "https://cdn.huggingface.co/google/reformer-enwik8/pytorch_model.bin",
-}
+REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "google/reformer-crime-and-punishment",
+    "google/reformer-enwik8",
+    # See all Reformer models at https://huggingface.co/models?filter=reformer
+]


 def mish(x):
@@ -1373,7 +1374,6 @@ class ReformerPreTrainedModel(PreTrainedModel):
    """

    config_class = ReformerConfig
-    pretrained_model_archive_map = REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "reformer"

    @property

--- a/src/transformers/modeling_roberta.py
+++ b/src/transformers/modeling_roberta.py
@@ -30,14 +30,15 @@ from .modeling_utils import create_position_ids_from_input_ids

 logger = logging.getLogger(__name__)

-ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "roberta-base": "https://cdn.huggingface.co/roberta-base-pytorch_model.bin",
-    "roberta-large": "https://cdn.huggingface.co/roberta-large-pytorch_model.bin",
-    "roberta-large-mnli": "https://cdn.huggingface.co/roberta-large-mnli-pytorch_model.bin",
-    "distilroberta-base": "https://cdn.huggingface.co/distilroberta-base-pytorch_model.bin",
-    "roberta-base-openai-detector": "https://cdn.huggingface.co/roberta-base-openai-detector-pytorch_model.bin",
-    "roberta-large-openai-detector": "https://cdn.huggingface.co/roberta-large-openai-detector-pytorch_model.bin",
-}
+ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "roberta-base",
+    "roberta-large",
+    "roberta-large-mnli",
+    "distilroberta-base",
+    "roberta-base-openai-detector",
+    "roberta-large-openai-detector",
+    # See all RoBERTa models at https://huggingface.co/models?filter=roberta
+]


 class RobertaEmbeddings(BertEmbeddings):
@@ -142,7 +143,6 @@ class RobertaModel(BertModel):
    """

    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
@@ -161,7 +161,6 @@ class RobertaModel(BertModel):
 @add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """, ROBERTA_START_DOCSTRING)
 class RobertaForMaskedLM(BertPreTrainedModel):
    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
@@ -276,7 +275,6 @@ class RobertaLMHead(nn.Module):
 )
 class RobertaForSequenceClassification(BertPreTrainedModel):
    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
@@ -367,7 +365,6 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
 )
 class RobertaForMultipleChoice(BertPreTrainedModel):
    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
@@ -466,7 +463,6 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
 )
 class RobertaForTokenClassification(BertPreTrainedModel):
    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
@@ -586,7 +582,6 @@ class RobertaClassificationHead(nn.Module):
 )
 class RobertaForQuestionAnswering(BertPreTrainedModel):
    config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):

--- a/src/transformers/modeling_t5.py
+++ b/src/transformers/modeling_t5.py
@@ -36,13 +36,14 @@ logger = logging.getLogger(__name__)
 # This dict contrains shortcut names and associated url
 # for the pretrained weights provided with the models
 ####################################################
-T5_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "t5-small": "https://cdn.huggingface.co/t5-small-pytorch_model.bin",
-    "t5-base": "https://cdn.huggingface.co/t5-base-pytorch_model.bin",
-    "t5-large": "https://cdn.huggingface.co/t5-large-pytorch_model.bin",
-    "t5-3b": "https://cdn.huggingface.co/t5-3b-pytorch_model.bin",
-    "t5-11b": "https://cdn.huggingface.co/t5-11b-pytorch_model.bin",
-}
+T5_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "t5-small",
+    "t5-base",
+    "t5-large",
+    "t5-3b",
+    "t5-11b",
+    # See all T5 models at https://huggingface.co/models?filter=t5
+]


 ####################################################
@@ -555,7 +556,6 @@ class T5PreTrainedModel(PreTrainedModel):
    """

    config_class = T5Config
-    pretrained_model_archive_map = T5_PRETRAINED_MODEL_ARCHIVE_MAP
    load_tf_weights = load_tf_weights_in_t5
    base_model_prefix = "transformer"


--- a/src/transformers/modeling_tf_albert.py
+++ b/src/transformers/modeling_tf_albert.py
@@ -29,16 +29,17 @@ from .tokenization_utils import BatchEncoding

 logger = logging.getLogger(__name__)

-TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-with-prefix-tf_model.h5",
-    "albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-with-prefix-tf_model.h5",
-    "albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-with-prefix-tf_model.h5",
-    "albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-with-prefix-tf_model.h5",
-    "albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-with-prefix-tf_model.h5",
-    "albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-with-prefix-tf_model.h5",
-    "albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-with-prefix-tf_model.h5",
-    "albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-with-prefix-tf_model.h5",
-}
+TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "albert-base-v1",
+    "albert-large-v1",
+    "albert-xlarge-v1",
+    "albert-xxlarge-v1",
+    "albert-base-v2",
+    "albert-large-v2",
+    "albert-xlarge-v2",
+    "albert-xxlarge-v2",
+    # See all ALBERT models at https://huggingface.co/models?filter=albert
+]


 class TFAlbertEmbeddings(tf.keras.layers.Layer):
@@ -440,7 +441,6 @@ class TFAlbertPreTrainedModel(TFPreTrainedModel):
    """

    config_class = AlbertConfig
-    pretrained_model_archive_map = TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "albert"



--- a/src/transformers/modeling_tf_auto.py
+++ b/src/transformers/modeling_tf_auto.py
--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@@ -30,28 +30,29 @@ from .tokenization_utils import BatchEncoding
 logger = logging.getLogger(__name__)


-TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-tf_model.h5",
-    "bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-tf_model.h5",
-    "bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-tf_model.h5",
-    "bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-tf_model.h5",
-    "bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-tf_model.h5",
-    "bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-tf_model.h5",
-    "bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-tf_model.h5",
-    "bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-tf_model.h5",
-    "bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-tf_model.h5",
-    "bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-tf_model.h5",
-    "bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-tf_model.h5",
-    "bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-tf_model.h5",
-    "bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-tf_model.h5",
-    "bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/tf_model.h5",
-    "bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/tf_model.h5",
-    "bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/tf_model.h5",
-    "bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/tf_model.h5",
-    "bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/tf_model.h5",
-    "bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/tf_model.h5",
-    "bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/tf_model.h5",
-}
+TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "bert-base-uncased",
+    "bert-large-uncased",
+    "bert-base-cased",
+    "bert-large-cased",
+    "bert-base-multilingual-uncased",
+    "bert-base-multilingual-cased",
+    "bert-base-chinese",
+    "bert-base-german-cased",
+    "bert-large-uncased-whole-word-masking",
+    "bert-large-cased-whole-word-masking",
+    "bert-large-uncased-whole-word-masking-finetuned-squad",
+    "bert-large-cased-whole-word-masking-finetuned-squad",
+    "bert-base-cased-finetuned-mrpc",
+    "cl-tohoku/bert-base-japanese",
+    "cl-tohoku/bert-base-japanese-whole-word-masking",
+    "cl-tohoku/bert-base-japanese-char",
+    "cl-tohoku/bert-base-japanese-char-whole-word-masking",
+    "TurkuNLP/bert-base-finnish-cased-v1",
+    "TurkuNLP/bert-base-finnish-uncased-v1",
+    "wietsedv/bert-base-dutch-cased",
+    # See all BERT models at https://huggingface.co/models?filter=bert
+]


 def gelu(x):
@@ -585,7 +586,6 @@ class TFBertPreTrainedModel(TFPreTrainedModel):
    """

    config_class = BertConfig
-    pretrained_model_archive_map = TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "bert"



--- a/src/transformers/modeling_tf_camembert.py
+++ b/src/transformers/modeling_tf_camembert.py
@@ -30,7 +30,9 @@ from .modeling_tf_roberta import (

 logger = logging.getLogger(__name__)

-TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {}
+TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    # See all CamemBERT models at https://huggingface.co/models?filter=camembert
+]


 CAMEMBERT_START_DOCSTRING = r"""
@@ -72,7 +74,6 @@ class TFCamembertModel(TFRobertaModel):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -85,7 +86,6 @@ class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -100,7 +100,6 @@ class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP


 @add_start_docstrings(
@@ -115,4 +114,3 @@ class TFCamembertForTokenClassification(TFRobertaForTokenClassification):
    """

    config_class = CamembertConfig
-    pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
--- a/src/transformers/modeling_tf_ctrl.py
+++ b/src/transformers/modeling_tf_ctrl.py
@@ -29,7 +29,10 @@ from .tokenization_utils import BatchEncoding

 logger = logging.getLogger(__name__)

-TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://cdn.huggingface.co/ctrl-tf_model.h5"}
+TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "ctrl"
+    # See all CTRL models at https://huggingface.co/models?filter=ctrl
+]


 def angle_defn(pos, i, d_model_size):
@@ -379,7 +382,6 @@ class TFCTRLPreTrainedModel(TFPreTrainedModel):
    """

    config_class = CTRLConfig
-    pretrained_model_archive_map = TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "transformer"



--- a/src/transformers/modeling_tf_distilbert.py
+++ b/src/transformers/modeling_tf_distilbert.py
@@ -31,14 +31,15 @@ from .tokenization_utils import BatchEncoding
 logger = logging.getLogger(__name__)


-TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    "distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-tf_model.h5",
-    "distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-tf_model.h5",
-    "distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-tf_model.h5",
-    "distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-tf_model.h5",
-    "distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-tf_model.h5",
-    "distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5",
-}
+TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "distilbert-base-uncased",
+    "distilbert-base-uncased-distilled-squad",
+    "distilbert-base-cased",
+    "distilbert-base-cased-distilled-squad",
+    "distilbert-base-multilingual-cased",
+    "distilbert-base-uncased-finetuned-sst-2-english",
+    # See all DistilBERT models at https://huggingface.co/models?filter=distilbert
+]


 # UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE #
@@ -467,7 +468,6 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel):
    """

    config_class = DistilBertConfig
-    pretrained_model_archive_map = TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "distilbert"