Unverified Commit d4c2cb40 authored by Julien Chaumond's avatar Julien Chaumond Committed by GitHub
Browse files

Kill model archive maps (#4636)

* Kill model archive maps

* Fixup

* Also kill model_archive_map for MaskedBertPreTrainedModel

* Unhook config_archive_map

* Tokenizers: align with model id changes

* make style && make quality

* Fix CI
parent 47a551d1
......@@ -32,13 +32,15 @@ from .modeling_utils import PreTrainedModel, create_position_ids_from_input_ids
logger = logging.getLogger(__name__)
BART_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bart-large": "https://cdn.huggingface.co/facebook/bart-large/pytorch_model.bin",
"bart-large-mnli": "https://cdn.huggingface.co/facebook/bart-large-mnli/pytorch_model.bin",
"bart-large-cnn": "https://cdn.huggingface.co/facebook/bart-large-cnn/pytorch_model.bin",
"bart-large-xsum": "https://cdn.huggingface.co/facebook/bart-large-xsum/pytorch_model.bin",
"mbart-large-en-ro": "https://cdn.huggingface.co/facebook/mbart-large-en-ro/pytorch_model.bin",
}
BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/bart-large",
"facebook/bart-large-mnli",
"facebook/bart-large-cnn",
"facebook/bart-large-xsum",
"facebook/mbart-large-en-ro",
# See all BART models at https://huggingface.co/models?filter=bart
]
BART_START_DOCSTRING = r"""
......@@ -118,7 +120,6 @@ def _prepare_bart_decoder_inputs(
class PretrainedBartModel(PreTrainedModel):
config_class = BartConfig
base_model_prefix = "model"
pretrained_model_archive_map = BART_PRETRAINED_MODEL_ARCHIVE_MAP
def _init_weights(self, module):
std = self.config.init_std
......
......@@ -32,30 +32,31 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer
logger = logging.getLogger(__name__)
BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-pytorch_model.bin",
"bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-pytorch_model.bin",
"bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin",
"bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-pytorch_model.bin",
"bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-pytorch_model.bin",
"bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-pytorch_model.bin",
"bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-pytorch_model.bin",
"bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-pytorch_model.bin",
"bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-pytorch_model.bin",
"bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-pytorch_model.bin",
"bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin",
"bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin",
"bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-pytorch_model.bin",
"bert-base-german-dbmdz-cased": "https://cdn.huggingface.co/bert-base-german-dbmdz-cased-pytorch_model.bin",
"bert-base-german-dbmdz-uncased": "https://cdn.huggingface.co/bert-base-german-dbmdz-uncased-pytorch_model.bin",
"bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/pytorch_model.bin",
"bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/pytorch_model.bin",
"bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/pytorch_model.bin",
"bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/pytorch_model.bin",
"bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin",
"bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin",
"bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/pytorch_model.bin",
}
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased",
"bert-large-uncased",
"bert-base-cased",
"bert-large-cased",
"bert-base-multilingual-uncased",
"bert-base-multilingual-cased",
"bert-base-chinese",
"bert-base-german-cased",
"bert-large-uncased-whole-word-masking",
"bert-large-cased-whole-word-masking",
"bert-large-uncased-whole-word-masking-finetuned-squad",
"bert-large-cased-whole-word-masking-finetuned-squad",
"bert-base-cased-finetuned-mrpc",
"bert-base-german-dbmdz-cased",
"bert-base-german-dbmdz-uncased",
"cl-tohoku/bert-base-japanese",
"cl-tohoku/bert-base-japanese-whole-word-masking",
"cl-tohoku/bert-base-japanese-char",
"cl-tohoku/bert-base-japanese-char-whole-word-masking",
"TurkuNLP/bert-base-finnish-cased-v1",
"TurkuNLP/bert-base-finnish-uncased-v1",
"wietsedv/bert-base-dutch-cased",
# See all BERT models at https://huggingface.co/models?filter=bert
]
def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
......@@ -513,7 +514,6 @@ class BertPreTrainedModel(PreTrainedModel):
"""
config_class = BertConfig
pretrained_model_archive_map = BERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_bert
base_model_prefix = "bert"
......
......@@ -31,11 +31,12 @@ from .modeling_roberta import (
logger = logging.getLogger(__name__)
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"camembert-base": "https://cdn.huggingface.co/camembert-base-pytorch_model.bin",
"umberto-commoncrawl-cased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/pytorch_model.bin",
"umberto-wikipedia-uncased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/pytorch_model.bin",
}
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"camembert-base",
"Musixmatch/umberto-commoncrawl-cased-v1",
"Musixmatch/umberto-wikipedia-uncased-v1",
# See all CamemBERT models at https://huggingface.co/models?filter=camembert
]
CAMEMBERT_START_DOCSTRING = r"""
......@@ -62,7 +63,6 @@ class CamembertModel(RobertaModel):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -75,7 +75,6 @@ class CamembertForMaskedLM(RobertaForMaskedLM):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -90,7 +89,6 @@ class CamembertForSequenceClassification(RobertaForSequenceClassification):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -105,7 +103,6 @@ class CamembertForMultipleChoice(RobertaForMultipleChoice):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -120,7 +117,6 @@ class CamembertForTokenClassification(RobertaForTokenClassification):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -135,4 +131,3 @@ class CamembertForQuestionAnswering(RobertaForQuestionAnswering):
"""
config_class = CamembertConfig
pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
......@@ -30,7 +30,10 @@ from .modeling_utils import Conv1D, PreTrainedModel
logger = logging.getLogger(__name__)
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf-ctrl/pytorch/seqlen256_v1.bin"}
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
"ctrl"
# See all CTRL models at https://huggingface.co/models?filter=ctrl
]
def angle_defn(pos, i, d_model_size):
......@@ -178,7 +181,6 @@ class CTRLPreTrainedModel(PreTrainedModel):
"""
config_class = CTRLConfig
pretrained_model_archive_map = CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "transformer"
def _init_weights(self, module):
......
......@@ -36,15 +36,16 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer
logger = logging.getLogger(__name__)
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-pytorch_model.bin",
"distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-pytorch_model.bin",
"distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-pytorch_model.bin",
"distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-pytorch_model.bin",
"distilbert-base-german-cased": "https://cdn.huggingface.co/distilbert-base-german-cased-pytorch_model.bin",
"distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-pytorch_model.bin",
"distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin",
}
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"distilbert-base-uncased",
"distilbert-base-uncased-distilled-squad",
"distilbert-base-cased",
"distilbert-base-cased-distilled-squad",
"distilbert-base-german-cased",
"distilbert-base-multilingual-cased",
"distilbert-base-uncased-finetuned-sst-2-english",
# See all DistilBERT models at https://huggingface.co/models?filter=distilbert
]
# UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE #
......@@ -327,7 +328,6 @@ class DistilBertPreTrainedModel(PreTrainedModel):
"""
config_class = DistilBertConfig
pretrained_model_archive_map = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = None
base_model_prefix = "distilbert"
......
......@@ -14,14 +14,15 @@ from .modeling_bert import BertEmbeddings, BertEncoder, BertLayerNorm, BertPreTr
logger = logging.getLogger(__name__)
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = {
"google/electra-small-generator": "https://cdn.huggingface.co/google/electra-small-generator/pytorch_model.bin",
"google/electra-base-generator": "https://cdn.huggingface.co/google/electra-base-generator/pytorch_model.bin",
"google/electra-large-generator": "https://cdn.huggingface.co/google/electra-large-generator/pytorch_model.bin",
"google/electra-small-discriminator": "https://cdn.huggingface.co/google/electra-small-discriminator/pytorch_model.bin",
"google/electra-base-discriminator": "https://cdn.huggingface.co/google/electra-base-discriminator/pytorch_model.bin",
"google/electra-large-discriminator": "https://cdn.huggingface.co/google/electra-large-discriminator/pytorch_model.bin",
}
ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/electra-small-generator",
"google/electra-base-generator",
"google/electra-large-generator",
"google/electra-small-discriminator",
"google/electra-base-discriminator",
"google/electra-large-discriminator",
# See all ELECTRA models at https://huggingface.co/models?filter=electra
]
def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_or_generator="discriminator"):
......@@ -160,7 +161,6 @@ class ElectraPreTrainedModel(BertPreTrainedModel):
"""
config_class = ElectraConfig
pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_electra
base_model_prefix = "electra"
......
......@@ -35,12 +35,13 @@ from .modeling_xlm import (
logger = logging.getLogger(__name__)
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"flaubert-small-cased": "https://cdn.huggingface.co/flaubert/flaubert_small_cased/pytorch_model.bin",
"flaubert-base-uncased": "https://cdn.huggingface.co/flaubert/flaubert_base_uncased/pytorch_model.bin",
"flaubert-base-cased": "https://cdn.huggingface.co/flaubert/flaubert_base_cased/pytorch_model.bin",
"flaubert-large-cased": "https://cdn.huggingface.co/flaubert/flaubert_large_cased/pytorch_model.bin",
}
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"flaubert/flaubert_small_cased",
"flaubert/flaubert_base_uncased",
"flaubert/flaubert_base_cased",
"flaubert/flaubert_large_cased",
# See all Flaubert models at https://huggingface.co/models?filter=flaubert
]
FLAUBERT_START_DOCSTRING = r"""
......@@ -109,7 +110,6 @@ FLAUBERT_INPUTS_DOCSTRING = r"""
class FlaubertModel(XLMModel):
config_class = FlaubertConfig
pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP
def __init__(self, config): # , dico, is_encoder, with_output):
super().__init__(config)
......@@ -304,7 +304,6 @@ class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
"""
config_class = FlaubertConfig
pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP
def __init__(self, config):
super().__init__(config)
......@@ -324,7 +323,6 @@ class FlaubertForSequenceClassification(XLMForSequenceClassification):
"""
config_class = FlaubertConfig
pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP
def __init__(self, config):
super().__init__(config)
......@@ -344,7 +342,6 @@ class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
"""
config_class = FlaubertConfig
pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP
def __init__(self, config):
super().__init__(config)
......@@ -364,7 +361,6 @@ class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
"""
config_class = FlaubertConfig
pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP
def __init__(self, config):
super().__init__(config)
......
......@@ -31,13 +31,14 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv
logger = logging.getLogger(__name__)
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = {
"gpt2": "https://cdn.huggingface.co/gpt2-pytorch_model.bin",
"gpt2-medium": "https://cdn.huggingface.co/gpt2-medium-pytorch_model.bin",
"gpt2-large": "https://cdn.huggingface.co/gpt2-large-pytorch_model.bin",
"gpt2-xl": "https://cdn.huggingface.co/gpt2-xl-pytorch_model.bin",
"distilgpt2": "https://cdn.huggingface.co/distilgpt2-pytorch_model.bin",
}
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
"gpt2",
"gpt2-medium",
"gpt2-large",
"gpt2-xl",
"distilgpt2",
# See all GPT-2 models at https://huggingface.co/models?filter=gpt2
]
def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
......@@ -251,7 +252,6 @@ class GPT2PreTrainedModel(PreTrainedModel):
"""
config_class = GPT2Config
pretrained_model_archive_map = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_gpt2
base_model_prefix = "transformer"
......
......@@ -30,13 +30,14 @@ from .modeling_roberta import RobertaLMHead, RobertaModel
logger = logging.getLogger(__name__)
LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
"allenai/longformer-base-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096/pytorch_model.bin",
"allenai/longformer-large-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096/pytorch_model.bin",
"allenai/longformer-large-4096-finetuned-triviaqa": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-finetuned-triviaqa/pytorch_model.bin",
"allenai/longformer-base-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096-extra.pos.embd.only/pytorch_model.bin",
"allenai/longformer-large-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-extra.pos.embd.only/pytorch_model.bin",
}
LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"allenai/longformer-base-4096",
"allenai/longformer-large-4096",
"allenai/longformer-large-4096-finetuned-triviaqa",
"allenai/longformer-base-4096-extra.pos.embd.only",
"allenai/longformer-large-4096-extra.pos.embd.only",
# See all Longformer models at https://huggingface.co/models?filter=longformer
]
def _get_question_end_index(input_ids, sep_token_id):
......@@ -513,7 +514,6 @@ class LongformerModel(RobertaModel):
"""
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......@@ -685,7 +685,6 @@ class LongformerModel(RobertaModel):
@add_start_docstrings("""Longformer Model with a `language modeling` head on top. """, LONGFORMER_START_DOCSTRING)
class LongformerForMaskedLM(BertPreTrainedModel):
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......@@ -776,7 +775,6 @@ class LongformerForMaskedLM(BertPreTrainedModel):
)
class LongformerForSequenceClassification(BertPreTrainedModel):
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......@@ -893,7 +891,6 @@ class LongformerClassificationHead(nn.Module):
)
class LongformerForQuestionAnswering(BertPreTrainedModel):
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......@@ -1018,7 +1015,6 @@ class LongformerForQuestionAnswering(BertPreTrainedModel):
)
class LongformerForTokenClassification(BertPreTrainedModel):
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......@@ -1119,7 +1115,6 @@ class LongformerForTokenClassification(BertPreTrainedModel):
)
class LongformerForMultipleChoice(BertPreTrainedModel):
config_class = LongformerConfig
pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "longformer"
def __init__(self, config):
......
......@@ -18,6 +18,11 @@
from transformers.modeling_bart import BartForConditionalGeneration
MARIAN_PRETRAINED_MODEL_ARCHIVE_LIST = [
# See all Marian models at https://huggingface.co/models?search=Helsinki-NLP
]
class MarianMTModel(BartForConditionalGeneration):
r"""
Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints.
......@@ -41,8 +46,6 @@ class MarianMTModel(BartForConditionalGeneration):
"""
pretrained_model_archive_map = {} # see https://huggingface.co/models?search=Helsinki-NLP
def prepare_logits_for_generation(self, logits, cur_len, max_length):
logits[:, self.config.pad_token_id] = float("-inf")
if cur_len == max_length - 1 and self.config.eos_token_id is not None:
......
......@@ -33,7 +33,10 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv
logger = logging.getLogger(__name__)
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = {"openai-gpt": "https://cdn.huggingface.co/openai-gpt-pytorch_model.bin"}
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"openai-gpt",
# See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt
]
def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
......@@ -252,7 +255,6 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
"""
config_class = OpenAIGPTConfig
pretrained_model_archive_map = OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_openai_gpt
base_model_prefix = "transformer"
......
......@@ -35,10 +35,11 @@ from .modeling_utils import PreTrainedModel, apply_chunking_to_forward
logger = logging.getLogger(__name__)
REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = {
"google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/pytorch_model.bin",
"google/reformer-enwik8": "https://cdn.huggingface.co/google/reformer-enwik8/pytorch_model.bin",
}
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/reformer-crime-and-punishment",
"google/reformer-enwik8",
# See all Reformer models at https://huggingface.co/models?filter=reformer
]
def mish(x):
......@@ -1373,7 +1374,6 @@ class ReformerPreTrainedModel(PreTrainedModel):
"""
config_class = ReformerConfig
pretrained_model_archive_map = REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "reformer"
@property
......
......@@ -30,14 +30,15 @@ from .modeling_utils import create_position_ids_from_input_ids
logger = logging.getLogger(__name__)
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {
"roberta-base": "https://cdn.huggingface.co/roberta-base-pytorch_model.bin",
"roberta-large": "https://cdn.huggingface.co/roberta-large-pytorch_model.bin",
"roberta-large-mnli": "https://cdn.huggingface.co/roberta-large-mnli-pytorch_model.bin",
"distilroberta-base": "https://cdn.huggingface.co/distilroberta-base-pytorch_model.bin",
"roberta-base-openai-detector": "https://cdn.huggingface.co/roberta-base-openai-detector-pytorch_model.bin",
"roberta-large-openai-detector": "https://cdn.huggingface.co/roberta-large-openai-detector-pytorch_model.bin",
}
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"roberta-base",
"roberta-large",
"roberta-large-mnli",
"distilroberta-base",
"roberta-base-openai-detector",
"roberta-large-openai-detector",
# See all RoBERTa models at https://huggingface.co/models?filter=roberta
]
class RobertaEmbeddings(BertEmbeddings):
......@@ -142,7 +143,6 @@ class RobertaModel(BertModel):
"""
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......@@ -161,7 +161,6 @@ class RobertaModel(BertModel):
@add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """, ROBERTA_START_DOCSTRING)
class RobertaForMaskedLM(BertPreTrainedModel):
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......@@ -276,7 +275,6 @@ class RobertaLMHead(nn.Module):
)
class RobertaForSequenceClassification(BertPreTrainedModel):
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......@@ -367,7 +365,6 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
)
class RobertaForMultipleChoice(BertPreTrainedModel):
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......@@ -466,7 +463,6 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
)
class RobertaForTokenClassification(BertPreTrainedModel):
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......@@ -586,7 +582,6 @@ class RobertaClassificationHead(nn.Module):
)
class RobertaForQuestionAnswering(BertPreTrainedModel):
config_class = RobertaConfig
pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "roberta"
def __init__(self, config):
......
......@@ -36,13 +36,14 @@ logger = logging.getLogger(__name__)
# This dict contrains shortcut names and associated url
# for the pretrained weights provided with the models
####################################################
T5_PRETRAINED_MODEL_ARCHIVE_MAP = {
"t5-small": "https://cdn.huggingface.co/t5-small-pytorch_model.bin",
"t5-base": "https://cdn.huggingface.co/t5-base-pytorch_model.bin",
"t5-large": "https://cdn.huggingface.co/t5-large-pytorch_model.bin",
"t5-3b": "https://cdn.huggingface.co/t5-3b-pytorch_model.bin",
"t5-11b": "https://cdn.huggingface.co/t5-11b-pytorch_model.bin",
}
T5_PRETRAINED_MODEL_ARCHIVE_LIST = [
"t5-small",
"t5-base",
"t5-large",
"t5-3b",
"t5-11b",
# See all T5 models at https://huggingface.co/models?filter=t5
]
####################################################
......@@ -555,7 +556,6 @@ class T5PreTrainedModel(PreTrainedModel):
"""
config_class = T5Config
pretrained_model_archive_map = T5_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_t5
base_model_prefix = "transformer"
......
......@@ -29,16 +29,17 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__)
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-with-prefix-tf_model.h5",
"albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-with-prefix-tf_model.h5",
"albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-with-prefix-tf_model.h5",
"albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-with-prefix-tf_model.h5",
"albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-with-prefix-tf_model.h5",
"albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-with-prefix-tf_model.h5",
"albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-with-prefix-tf_model.h5",
"albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-with-prefix-tf_model.h5",
}
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert-base-v1",
"albert-large-v1",
"albert-xlarge-v1",
"albert-xxlarge-v1",
"albert-base-v2",
"albert-large-v2",
"albert-xlarge-v2",
"albert-xxlarge-v2",
# See all ALBERT models at https://huggingface.co/models?filter=albert
]
class TFAlbertEmbeddings(tf.keras.layers.Layer):
......@@ -440,7 +441,6 @@ class TFAlbertPreTrainedModel(TFPreTrainedModel):
"""
config_class = AlbertConfig
pretrained_model_archive_map = TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "albert"
......
This diff is collapsed.
......@@ -30,28 +30,29 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__)
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-tf_model.h5",
"bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-tf_model.h5",
"bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-tf_model.h5",
"bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-tf_model.h5",
"bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-tf_model.h5",
"bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-tf_model.h5",
"bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-tf_model.h5",
"bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-tf_model.h5",
"bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-tf_model.h5",
"bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-tf_model.h5",
"bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-tf_model.h5",
"bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-tf_model.h5",
"bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-tf_model.h5",
"bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/tf_model.h5",
"bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/tf_model.h5",
"bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/tf_model.h5",
"bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/tf_model.h5",
"bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/tf_model.h5",
"bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/tf_model.h5",
"bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/tf_model.h5",
}
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased",
"bert-large-uncased",
"bert-base-cased",
"bert-large-cased",
"bert-base-multilingual-uncased",
"bert-base-multilingual-cased",
"bert-base-chinese",
"bert-base-german-cased",
"bert-large-uncased-whole-word-masking",
"bert-large-cased-whole-word-masking",
"bert-large-uncased-whole-word-masking-finetuned-squad",
"bert-large-cased-whole-word-masking-finetuned-squad",
"bert-base-cased-finetuned-mrpc",
"cl-tohoku/bert-base-japanese",
"cl-tohoku/bert-base-japanese-whole-word-masking",
"cl-tohoku/bert-base-japanese-char",
"cl-tohoku/bert-base-japanese-char-whole-word-masking",
"TurkuNLP/bert-base-finnish-cased-v1",
"TurkuNLP/bert-base-finnish-uncased-v1",
"wietsedv/bert-base-dutch-cased",
# See all BERT models at https://huggingface.co/models?filter=bert
]
def gelu(x):
......@@ -585,7 +586,6 @@ class TFBertPreTrainedModel(TFPreTrainedModel):
"""
config_class = BertConfig
pretrained_model_archive_map = TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "bert"
......
......@@ -30,7 +30,9 @@ from .modeling_tf_roberta import (
logger = logging.getLogger(__name__)
TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {}
TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
# See all CamemBERT models at https://huggingface.co/models?filter=camembert
]
CAMEMBERT_START_DOCSTRING = r"""
......@@ -72,7 +74,6 @@ class TFCamembertModel(TFRobertaModel):
"""
config_class = CamembertConfig
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -85,7 +86,6 @@ class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
"""
config_class = CamembertConfig
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -100,7 +100,6 @@ class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification):
"""
config_class = CamembertConfig
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@add_start_docstrings(
......@@ -115,4 +114,3 @@ class TFCamembertForTokenClassification(TFRobertaForTokenClassification):
"""
config_class = CamembertConfig
pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
......@@ -29,7 +29,10 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__)
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://cdn.huggingface.co/ctrl-tf_model.h5"}
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [
"ctrl"
# See all CTRL models at https://huggingface.co/models?filter=ctrl
]
def angle_defn(pos, i, d_model_size):
......@@ -379,7 +382,6 @@ class TFCTRLPreTrainedModel(TFPreTrainedModel):
"""
config_class = CTRLConfig
pretrained_model_archive_map = TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "transformer"
......
......@@ -31,14 +31,15 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__)
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-tf_model.h5",
"distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-tf_model.h5",
"distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-tf_model.h5",
"distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-tf_model.h5",
"distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-tf_model.h5",
"distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5",
}
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"distilbert-base-uncased",
"distilbert-base-uncased-distilled-squad",
"distilbert-base-cased",
"distilbert-base-cased-distilled-squad",
"distilbert-base-multilingual-cased",
"distilbert-base-uncased-finetuned-sst-2-english",
# See all DistilBERT models at https://huggingface.co/models?filter=distilbert
]
# UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE #
......@@ -467,7 +468,6 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel):
"""
config_class = DistilBertConfig
pretrained_model_archive_map = TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "distilbert"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment